{
 "nbformat": 4,
 "nbformat_minor": 2,
 "metadata": {
  "language_info": {
   "name": "python",
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "version": "3.8.1-final"
  },
  "orig_nbformat": 2,
  "file_extension": ".py",
  "mimetype": "text/x-python",
  "name": "python",
  "npconvert_exporter": "python",
  "pygments_lexer": "ipython3",
  "version": 3,
  "kernelspec": {
   "name": "python38164bit94595e38455f4cc1b10edf32f50056fb",
   "display_name": "Python 3.8.1 64-bit"
  }
 },
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_rating = pd.read_csv('rating.csv', usecols=[0,1,2])\n",
    "df_tag = pd.read_csv('tag.csv', usecols=[0,1,2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>userId</th>\n      <th>movieId</th>\n      <th>rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>2</td>\n      <td>3.5</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>29</td>\n      <td>3.5</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>32</td>\n      <td>3.5</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>47</td>\n      <td>3.5</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>50</td>\n      <td>3.5</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>20000258</th>\n      <td>138493</td>\n      <td>68954</td>\n      <td>4.5</td>\n    </tr>\n    <tr>\n      <th>20000259</th>\n      <td>138493</td>\n      <td>69526</td>\n      <td>4.5</td>\n    </tr>\n    <tr>\n      <th>20000260</th>\n      <td>138493</td>\n      <td>69644</td>\n      <td>3.0</td>\n    </tr>\n    <tr>\n      <th>20000261</th>\n      <td>138493</td>\n      <td>70286</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>20000262</th>\n      <td>138493</td>\n      <td>71619</td>\n      <td>2.5</td>\n    </tr>\n  </tbody>\n</table>\n<p>20000263 rows × 3 columns</p>\n</div>",
      "text/plain": "          userId  movieId  rating\n0              1        2     3.5\n1              1       29     3.5\n2              1       32     3.5\n3              1       47     3.5\n4              1       50     3.5\n...          ...      ...     ...\n20000258  138493    68954     4.5\n20000259  138493    69526     4.5\n20000260  138493    69644     3.0\n20000261  138493    70286     5.0\n20000262  138493    71619     2.5\n\n[20000263 rows x 3 columns]"
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 20000263 entries, 0 to 20000262\nData columns (total 3 columns):\nuserId     int64\nmovieId    int64\nrating     float64\ndtypes: float64(1), int64(2)\nmemory usage: 457.8 MB\n"
    }
   ],
   "source": [
    "df_rating.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 465564 entries, 0 to 465563\nData columns (total 3 columns):\nuserId     465564 non-null int64\nmovieId    465564 non-null int64\ntag        465548 non-null object\ndtypes: int64(2), object(1)\nmemory usage: 10.7+ MB\n"
    }
   ],
   "source": [
    "df_tag.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>userId</th>\n      <th>movieId</th>\n      <th>tag</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>18</td>\n      <td>4141</td>\n      <td>Mark Waters</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>65</td>\n      <td>208</td>\n      <td>dark hero</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>65</td>\n      <td>353</td>\n      <td>dark hero</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>65</td>\n      <td>521</td>\n      <td>noir thriller</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>65</td>\n      <td>592</td>\n      <td>dark hero</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>465559</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>dragged</td>\n    </tr>\n    <tr>\n      <th>465560</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>Jason Bateman</td>\n    </tr>\n    <tr>\n      <th>465561</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>quirky</td>\n    </tr>\n    <tr>\n      <th>465562</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>sad</td>\n    </tr>\n    <tr>\n      <th>465563</th>\n      <td>138472</td>\n      <td>923</td>\n      <td>rise to power</td>\n    </tr>\n  </tbody>\n</table>\n<p>465564 rows × 3 columns</p>\n</div>",
      "text/plain": "        userId  movieId            tag\n0           18     4141    Mark Waters\n1           65      208      dark hero\n2           65      353      dark hero\n3           65      521  noir thriller\n4           65      592      dark hero\n...        ...      ...            ...\n465559  138446    55999        dragged\n465560  138446    55999  Jason Bateman\n465561  138446    55999         quirky\n465562  138446    55999            sad\n465563  138472      923  rise to power\n\n[465564 rows x 3 columns]"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "38644"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_tag.tag.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "19545"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df_tag.movieId.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "{'coming-of-age',\n 'metaphysics',\n 'Liv Tyler',\n 'smart comedy',\n 'Daniel Radcliffe',\n 'cars',\n 'romantic',\n 'Jack Lemmon',\n 'Mark Strong',\n 'SUPERNATURAL ROMANCE',\n 'Gus Van Sant',\n 'Danny DeVito',\n 'based on a true story',\n 'destiny',\n 'royalty',\n 'Strong Women',\n 'abortion',\n 'distorted reality',\n 'National Lampoon',\n 'stupid stereotypes',\n 'remake',\n 'hospital',\n 'to see: b-grade horror',\n 'catastrophe',\n 'sci fi',\n 'modern fantasy',\n 'irish accent',\n 'Marilyn Monroe',\n \"don't remember\",\n 'propaganda',\n 'Patrick Swayze',\n 'terrible',\n 'telekinesis',\n 'Steve Buscemi',\n 'shallow',\n 'artificial intelligence',\n 'watch the credits',\n 'Edward Norton',\n '2.5',\n 'family gatherings',\n 'Surreal',\n 'beer',\n 'good music',\n 'David Cross',\n 'twisted ending',\n 'Audrey Tautou',\n 'Michael J. Fox',\n 'nuns',\n 'Will Ferrell',\n 'Patton Oswalt',\n 'con men',\n 'Steve Carell',\n 'Academy Award Nominee',\n 'vampire',\n 'fast paced',\n 'Seth Rogen',\n 'Highly quotable',\n 'identity',\n 'James McAvoy',\n 'gadgets',\n 'complex',\n 'stunning',\n 'airplane',\n 'Jeremy Irons',\n 'black and white',\n 'polyamory',\n 'playwright:Shakespeare',\n 'Katherine Heigl',\n 'PG13',\n 'economics',\n \"Peter O'Toole\",\n 'Oscar (Best Actress)',\n 'non-hollywood ending',\n 'new york city',\n 'Christoph Waltz',\n 'Nudity (Rear)',\n 'Tim Roth',\n 'alone',\n 'Denzel Washington',\n 'self-sacrifice',\n 'Matthew McConaughey',\n 'great music',\n 'cliched',\n 'classical',\n 'cliche',\n 'actors',\n 'good ending',\n 'mad scientist',\n 'priest',\n 'realistic',\n 'spy',\n 'mother-son relationship',\n 'Iceland',\n 'Michael Winterbottom',\n 'sunny',\n 'Joel Schumacher',\n 'East Germany',\n 'Los Angeles',\n 'Gay Lead Character',\n 'Firefly',\n 'pointless',\n 'Juliette Lewis',\n 'Alfred Hitchcock',\n 'Kieran Culkin',\n 'underrated',\n 'rotoscoping',\n 'ambition',\n 'FBI',\n 'Berlin',\n 'almost favorite',\n 'books',\n 'Drew Barrymore',\n 'Nudity (Full Frontal)',\n 'wormhole',\n 'Bela Lugosi',\n 'Black comedy',\n 'Thriller',\n 'Drugs',\n 'want to see again',\n 'Memorable Characters',\n 'Love story',\n 'Almodovar',\n 'Incest',\n 'based on comic',\n 'Amy Smart',\n 'New Zealand',\n 'very good',\n 'road movie',\n 'watched 2006',\n 'Andy Samberg',\n 'sappy',\n 'cats',\n 'Rose Byrne',\n 'Sex Comedy',\n 'François Truffaut',\n 'infection',\n 'Western',\n 'loss',\n 'fascism',\n 'Hitchcock',\n 'Christianity',\n 'New Jersey',\n 'tragedy',\n '1',\n 'prostitution',\n 'communism',\n 'in netflix queue',\n 'paranormal',\n 'Tilda Swinton',\n 'incest',\n '12/11',\n 'unusual',\n 'Kristen Wiig',\n 'parenthood',\n 'Jennifer Lopez',\n 'Peter Jackson',\n 'crude',\n 'memories',\n 'Sacha Baron Cohen',\n 'devil',\n 'Bette Davis',\n 'hope',\n 'angel',\n '2014',\n 'Johnny Depp',\n 'history',\n 'airport',\n 'nuclear war',\n 'Marisa Tomei',\n 'cyberpunk',\n 'monster',\n 'wry',\n 'Soundtrack',\n 'derivative',\n 'china',\n 'president',\n '90s',\n 'thought provoking',\n 'love triangles',\n 'Tom Hanks',\n 'orphans',\n 'NO_FA_GANES',\n 'Jet Li',\n 'Tom Waits',\n 'Matt Damon',\n 'Kenneth Branagh',\n 'surrealism',\n 'gay romance',\n 'acting debut',\n 'Emily Mortimer',\n 'premonition',\n 'IMAX DMR 3-D',\n 'stephen king',\n 'break-up',\n 'Sigourney Weaver',\n 'unoriginal',\n 'Woody Harrelson',\n 'angry',\n 'plot twist',\n 'upbeat',\n 'rousing',\n 'ocean',\n 'Romance',\n 'Atheism',\n 'thought-provoking',\n 'Funny as hell',\n 'Myth',\n 'dreams',\n 'Sean Connery',\n 'boat',\n 'W.S. Van Dyke',\n 'Rutger Hauer',\n 'Neil Simon',\n 'Luke Wilson',\n 'Mike Nichols',\n 'SURVIVAL',\n 'Katie Holmes',\n 'Clea DuVall',\n 'PG',\n 'Jason Bateman',\n 'workplace',\n 'feminism',\n 'DVD-R',\n 'shark',\n 'Turkey',\n 'beautiful',\n 'weed',\n 'kids and family',\n 'Neil Gaiman',\n 'fake documentary',\n 'unlikeable characters',\n 'Philosophical',\n 'hackers',\n 'Renee Zellweger',\n 'Tim Allen',\n 'Jeff Bridges',\n 'cult classic',\n 'snow',\n 'free to download',\n 'Suicide',\n 'Monica Bellucci',\n 'Anna Faris',\n 'South Africa',\n 'Yasujirô Ozu',\n 'Italy',\n 'Christopher Plummer',\n 'few funny scenes',\n 'Frightening',\n '05/10',\n 'German',\n 'Amanda Seyfried',\n 'rats',\n 'extremely violent',\n 'Cate Blanchett',\n 'sword fight',\n 'shallow plot',\n 'geeks',\n 'Dr. Seuss',\n 'immortality',\n 'Rupert Grint',\n 'American Civil War',\n 'Seen 2009',\n 'gangster',\n 'pirates',\n 'History',\n 'Leslie Nielsen',\n 'Astaire and Rogers',\n 'Willem Dafoe',\n 'REDBOX',\n 'dwarf',\n 'biblical',\n 'Dennis Hopper',\n 'MTSKAF',\n 'jennifer aniston',\n 'genital mutilation',\n 'Julianne Moore',\n 'dvd-r',\n 'Oscar Winner',\n 'dialogue driven',\n 'spanish',\n 'Aardman',\n 'Leslie Mann',\n 'Ingrid Bergman',\n 'Don Cheadle',\n 'Chevy Chase',\n 'wizards',\n 'River Phoenix',\n 'Brad Pitt',\n 'Salma Hayek',\n 'unnecessary sequel',\n 'Godzilla',\n 'genocide',\n 'Hawaii',\n 'NASA',\n 'dated',\n 'giant monster',\n 'claustrophobic',\n '1980s',\n 'strange',\n 'Tarantino',\n 'Spaghetti Western',\n 'addiction',\n 'fantasy',\n 'Lance Henriksen',\n 'Mandy Moore',\n 'colourful',\n 'Bill Murray',\n 'physics',\n 'Bruce Campbell',\n 'Helen Mirren',\n 'distopia',\n 'teacher student relationship',\n 'pseudo-intelligent',\n 'Josh Hartnett',\n 'camerawork',\n 'bad',\n 'masterpiece',\n 'Gerard Butler',\n 'bad script',\n 'psychiatrist as protagonist',\n 'homage',\n 'Clarence Brown',\n 'overacting',\n 'ummarti2006',\n 'good dialogue',\n 'childish plot',\n 'Unexpected Ending',\n 'Luc Besson',\n 'murder mystery',\n 'nature',\n 'forceful',\n 'paris',\n 'to see: horror',\n 'misogyny',\n 'comic books',\n 'farce',\n 'superpowers',\n 'cave',\n \"Nostalgia Critic's Top 20\",\n 'Charlie Chaplin',\n 'shakespeare',\n 'small town',\n 'Shakespeare',\n '11/11',\n 'Ben Kingsley',\n 'seafaring',\n 'split personality',\n 'women',\n 'Oscar Nominee',\n 'japanese',\n 'scope',\n 'incoherent',\n 'author:Stephen King',\n 'Visuals',\n 'soundtrack',\n 'girlie movie',\n 'bland',\n 'INNOCENCE LOST',\n 'Catherine Keener',\n 'scandal',\n 'Jim carrey',\n 'Kevin Costner',\n \"Sven's to see list\",\n 'Paul Thomas Anderson',\n 'teleportation',\n 'Nudity (Full Frontal - Brief)',\n 'Epic',\n 'Heather Graham',\n 'multiple interpretations',\n 'surprise ending',\n 'animation',\n 'Forest Whitaker',\n 'made me cry',\n 'film noir',\n 'storm',\n 'my addition to ML',\n 'heroism',\n 'teen',\n 'Seth Green',\n 'naive',\n 'architecture',\n '100 Essential Female Performances',\n 'stoner comedy',\n 'train',\n 'amnesia',\n 'classical music',\n 'fugitive',\n 'period piece',\n 'scenic',\n 'campy',\n 'National Film Registry',\n 'matter-of-fact',\n 'tragic',\n 'dance',\n 'interesting characters',\n 'romantic comedy',\n 'Sarah Polley',\n 'Breathtaking',\n 'Zach Galifianakis',\n 'Matthew Broderick',\n 'Netflix Finland',\n 'little dialogue',\n 'Beatles',\n 'Jennifer Jason Leigh',\n 'Sidney Poitier',\n 'BEST PICTURE',\n 'BBC Films',\n 'lavish',\n 'WITCHCRAFT',\n 'French',\n 'shopping',\n 'Cult classic',\n 'punk',\n 'Andy Garcia',\n 'Joss Whedon',\n 'medicine',\n 'charming',\n 'Gulf War',\n 'Audrey Hepburn',\n 'holocaust',\n 'Ingmar Bergman',\n 'Emma Watson',\n 'Oscar Nominee: Director',\n 'italy',\n 'Netflix Streaming',\n 'black humour',\n 'weddings',\n 'heavy metal',\n 'Martin Sheen',\n 'Timothy Dalton',\n 'Farrelly Brothers',\n 'Jewish',\n 'cdon',\n 'ironic',\n 'updated classics',\n 'great',\n 'Soviet Union',\n 'Louis C.K.',\n 'Gary Oldman',\n 'Wizards',\n 'idiotic',\n 'Diane Keaton',\n 'Simon Pegg',\n 'restaurant',\n 'Roald Dahl',\n 'Christian',\n 'brainwashing',\n 'The Chosen One',\n 'scifi',\n 'Michael Keaton',\n 'afternoon section',\n 'Nicolas Cage',\n 'Sci-fi',\n 'Handycam',\n 'Bruce Lee',\n '06/11',\n 'prison escape',\n 'non-linear',\n 'Stanley Kubrick',\n 'Jules Verne',\n 'Jake Gyllenhaal',\n 'Wes Anderson',\n 'characters',\n 'Alcatraz',\n 'TRAPPED OR CONFINED',\n 'Korean',\n 'Benicio Del Toro',\n 'Acting',\n 'biopic',\n 'Tom Cruise',\n 'creative',\n 'Henry King',\n 'unsimulated sex scenes',\n 'realistic action',\n 'brothers',\n 'Ian McKellen',\n 'Jim Jarmusch',\n 'Louisiana',\n 'animals',\n 'CIA',\n 'poor acting',\n 'folk music',\n 'Dakota Fanning',\n 'Steven Soderbergh',\n 'mutation',\n 'hilarious',\n 'easygoing',\n 'occult',\n 'eerie',\n 'funny moments',\n 'segregation',\n 'original',\n 'Robert Ludlum',\n 'Kick-Butt Women',\n 'oppl',\n 'Disturbing',\n 'Oscar Nom 2007',\n 'Elizabeth Banks',\n 'great acting',\n 'youtube',\n 'Music',\n 'Creature Feature',\n 'Argentina',\n 'deliberate',\n 'movielens top pick',\n 'violent',\n 'teen pregnancy',\n 'Helen Hunt',\n 'Anamorphic Blow-Up',\n 'visuals',\n 'Mary-Louise Parker',\n 'Animation',\n 'spaghetti western',\n 'divorce',\n 'coming of age',\n 'reviewed',\n 'lesbian subtext',\n 'Woody Allen',\n 'Civil War',\n 'Sam Mendes',\n 'Jean Renoir',\n 'Jennifer Lawrence',\n 'Michael Cera',\n 'writers',\n 'Golden Raspberry (Worst Actress)',\n 'FIRST LOVE',\n 'visually appealing',\n 'confusing plot',\n 'delights',\n 'well done',\n '3.5',\n 'prison',\n 'Humphrey Bogart',\n 'Ron Howard',\n 'library',\n 'seen 2012',\n 'lovecraftian',\n 'cat killing',\n 'arnold',\n 'The Avengers',\n 'downbeat',\n 'secret service',\n 'village',\n 'based on a video game',\n 'tattoo',\n 'car chase',\n 'sequel',\n 'Jean-Pierre Jeunet',\n 'neo-noir',\n 'Nick Nolte',\n 'spies',\n 'photography',\n '9/11',\n 'cerebral',\n 'Legenda PT-BR',\n 'episodic',\n 'mother-daughter relationships',\n 'Elizabeth Taylor',\n 'California',\n 'Own It',\n 'LOVE TRIANGLES',\n 'Frank Capra',\n 'Gene Hackman',\n 'Hunter S. Thompson',\n 'sexual abuse',\n 'gory',\n 'Japanese',\n 'star wars',\n 'John Cusack',\n 'Tommy Lee Jones',\n 'Futuristmovies.com',\n 'Period piece',\n 'Sherlock Holmes',\n 'Dario Argento',\n 'Liam Neeson',\n 'Kristen Bell',\n 'beautiful cinematography',\n 'virginity',\n 'John Candy',\n 'Rachel Weisz',\n 'Cary Elwes',\n 'macho',\n 'Chinese',\n 'father daughter relationship',\n 'Robert Rodriguez',\n 'AFI 100 (Laughs)',\n 'mystery',\n 'natural disaster',\n 'Charles Dickens',\n 'Francis Ford Coppola',\n 'husband-wife relationship',\n 'slavery',\n 'Christmas',\n 'robert de niro',\n 'odd',\n 'fanciful',\n 'visceral',\n 'special forces',\n 'stylish',\n 'street race',\n \"joseph campbell's study of mythology influenced\",\n 'Richard Dreyfuss',\n 'Lindsay Lohan',\n 'musicians',\n 'strong woman',\n 'rate down',\n 'great story',\n 'alternate timeline',\n 'old',\n 'animal attacks',\n 'etaege',\n 'Saoirse Ronan',\n 'ghost story',\n 'political corruption',\n 'Gross-out',\n 'Anna Kendrick',\n 'somber',\n 'memory loss',\n 'Zac Efron',\n 'Film Theory & Criticism',\n 'detective',\n 'classic comedy',\n 'foul language',\n 'Voodoo',\n 'Sofia Coppola',\n 'satirical',\n 'Notable Nudity',\n 'Olympics',\n 'convoluted',\n 'USA film registry',\n 'Adaptation',\n 'Bill Pullman',\n 'Science Fiction',\n 'Catchy Score',\n 'bromantic',\n 'sex',\n 'Vera Farmiga',\n 'Takeshi Kitano',\n 'obsession',\n 'wilderness',\n 'smuggling',\n 'Own',\n 'dystopia',\n 'costume drama',\n 'Julia Roberts',\n 'should like',\n 'CLASS DIFFERENCES',\n 'William Shatner',\n 'environmental',\n 'marijuana',\n 'alone in the world',\n 'chocolate',\n 'vigilante',\n 'Owen Wilson',\n 'Saturn Award (Best Science Fiction Film)',\n '70mm',\n 'weird',\n 'politics',\n 'BD-R',\n 'witches',\n 'Mike Leigh',\n 'computers',\n 'Broadway',\n 'Lovecraftian mythology',\n 'excellent acting',\n 'character driven',\n 'Kevin Smith',\n 'hell',\n 'boxing drama',\n 'Jews',\n 'chess',\n 'William Wyler',\n 'election',\n 'dragon',\n 'Hugo Award',\n 'Danny McBride',\n 'Marvel Cinematic Universe',\n 'Danny Trejo',\n 'sex scene',\n 'Spanish',\n 'Ireland',\n 'Dwayne Johnson',\n 'AFI 100 (Thrills)',\n 'horror comedy',\n 'author:Jane Austen',\n 'experimental',\n 'college',\n 'nudity (topless - notable)',\n 'the book was better',\n 'nothing happens',\n 'dinosaurs',\n 'cowboys',\n 'colorful',\n 'imdb top 250',\n 'Judd Apatow',\n 'Children',\n 'James Gandolfini',\n 'graphic violence',\n 'Richard Gere',\n 'ummarti2007',\n 'sg',\n 'Michelle Pfeiffer',\n 'bullying',\n 'Paris',\n 'script',\n 'existentialism',\n 'John Ford',\n 'Paul Newman',\n 'ridiculous',\n 'crime gone awry',\n '03/09',\n 'childhood classics',\n 'Carl Sagan',\n 'William A. Wellman',\n 'seen 2011',\n 'fraternity',\n 'con artists',\n 'upper class',\n 'internet',\n 'Politics',\n 'Friday night movie',\n 'Kate Hudson',\n 'Sarah Michelle Gellar',\n 'clones',\n 'HAUNTED BY THE PAST',\n 'stunts',\n 'terrorism',\n 'biographical',\n \"Can't remember\",\n 'Gérard Depardieu',\n 'Edgar Rice Burroughs',\n 'food',\n 'Predictable',\n 'blind badass',\n 'sequels filmed simultaneously',\n 'sadism',\n 'Stupid as Hell',\n 'Nicholas Cage',\n 'Favorites',\n '17th century',\n 'human nature',\n 'cult film',\n '55 movies every kid should see--Entertainment Weekly',\n 'splatter',\n 'Madness',\n 'villain nonexistent or not needed for good story',\n 'Atmospheric',\n 'DVD',\n 'Kirsten Dunst',\n 'tongue-in-cheek',\n 'alternate reality',\n 'gangs',\n 'idealism',\n 'secrets',\n 'want to own',\n 'biology',\n 'punk rock',\n 'landscape',\n 'action',\n 'childhood',\n 'cinematography',\n 'Angelina Jolie',\n 'lovecraft',\n 'childish',\n 'redbox',\n 'silly',\n 'urbane',\n 'stupid ending',\n 'Animated',\n 'Ed Harris',\n 'bad science',\n 'Michel Gondry',\n 'Tobey Maguire',\n 'great dialogue',\n 'wistful',\n 'trash',\n 'football',\n 'Fred Zinnemann',\n 'spoof',\n 'Seen 2006',\n 'fairy tales',\n 'racing',\n 'existential',\n 'cartoon',\n 'friendship',\n 'medieval',\n 'psychic powers',\n 'denzel washington',\n 'DVD Collection',\n 'Shirley Temple',\n 'war',\n 'screwball',\n 'Sam Neill',\n 'supernatural',\n 'Michael Haneke',\n 'historical epic',\n 'Rosamund Pike',\n 'Iron Man',\n 'dogs',\n 'Julie Delpy',\n 'coincidences',\n 'stop motion',\n 'USA',\n 'Chris Tucker',\n 'George Cukor',\n 'sacrifice',\n 'Michael Douglas',\n 'cast',\n 'court',\n 'Chow Yun Fat',\n 'innovative',\n 'mermaid',\n 'transformation',\n 'blood',\n 'deadpan',\n 'London',\n 'multiple storylines',\n 'Sci-Fi',\n 'Parallel universe',\n 'Cillian Murphy',\n 'awesome soundtrack',\n 'urban',\n 'political satire',\n 'surreal',\n 'France',\n 'fbi',\n 'music documentary',\n 'crap',\n 'Germany',\n 'singing',\n 'party',\n 'drama',\n 'Kathy Bates',\n 'Dreamworks',\n 'director-screenwriter',\n 'Ennio Morricone',\n 'dog killing',\n 'Jamie Lee Curtis',\n 'Leonardo DiCaprio',\n 'true story',\n 'directorial debut',\n \"Tumey's To See Again\",\n 'cate blanchett',\n 'morgan freeman',\n 'england',\n 'retro',\n 'injustice',\n 'magic',\n 'David Fincher',\n 'score',\n 'france',\n 'BORING!',\n 'great ending',\n 'John Grisham',\n 'Tim Burton',\n 'Dark',\n 'add to prospects list',\n 'treasure hunt',\n 'Golden Palm',\n 'ECCENTRIC FAMILIES',\n 'horses',\n 'Christopher Nolan',\n 'Oscar (Best Foreign Language Film)',\n 'lengthy',\n 'courtesan',\n 'hypnosis',\n 'British',\n 'Anne Hathaway',\n 'David Mamet',\n 'hip hop',\n 'on dvr',\n 'Oscar (Best Music - Original Score)',\n 'Family',\n 'artistic',\n 'Giallo',\n 'Psychopathy',\n 'Jay Baruchel',\n 'corporations',\n 'Oscar (Best Actor)',\n 'cross dressing women',\n 'nudity (rear)',\n 'alcoholism',\n 'mother daughter relationship',\n '(s)vcd',\n 'Leonard Nimoy',\n 'exploitation',\n 'circus',\n 'Death',\n 'Pedro Almodovar',\n 'Clint Eastwood',\n 'Beautiful Woman',\n 'magic realism',\n 'double life',\n 'pixar',\n 'John Huston',\n 'owned',\n '70s',\n 'George A. Romero',\n 'James Cagney',\n 'Gary Cooper',\n 'too short',\n 'kung fu',\n 'Dolph Lundgren',\n 'disease',\n 'disaster',\n 'based on a TV show',\n 'pornography',\n 'Donald Sutherland',\n 'Natalie Portman',\n 'Emily Blunt',\n 'Timothy Olyphant',\n 'Wixom Library',\n 'Underrated',\n 'Martial Arts',\n 'Charles Bronson',\n 'Tim Curry',\n 'getdvd',\n 'cameos',\n 'Halle Berry',\n 'Jeff Daniels',\n 'predictable ending',\n 'K movie',\n 'ohsoso',\n 'humour',\n 'Marx Brothers',\n 'joss whedon',\n 'David Lean',\n 'twins',\n 'gratuitous violence',\n 'Funniest Movies',\n 'middle east',\n 'found footage',\n 'Remake',\n 'creepy',\n 'wwii',\n 'Ben Affleck',\n 'gothic',\n 'Kate',\n \"so bad it's almost good\",\n 'Mindfuck',\n 'homophobia',\n \"Vincent D'Onofrio\",\n 'super-hero',\n 'nuclear bomb',\n 'bombs',\n 'multinational settings',\n 'child abuse',\n 'competition',\n 'swedish',\n 'Charlton Heston',\n 'beautiful effects',\n 'Robert Wise',\n 'Michael Mann',\n 'video game adaptation',\n 'Joseph L. Mankiewicz',\n 'angelina jolie',\n 'drugs',\n 'gritty',\n 'Kurosawa',\n 'Want',\n 'Gene Wilder',\n 'crazy',\n 'penguins',\n 'good acting',\n 'visually stunning',\n 'Hayden Christensen',\n 'climate change',\n 'bright',\n 'government',\n 'United States',\n 'atmospheric',\n 'B-movie',\n 'Israel',\n 'affair',\n 'robbery',\n 'Sylvester Stallone',\n 'ambitious',\n 'life philosophy',\n 'CAV',\n 'domestic violence',\n 'Glenn Close',\n 'everything',\n 'sandra bullock',\n 'office',\n 'Peter Cushing',\n 'Interesting',\n 'Drinking',\n 'disappointing',\n 'suicide',\n 'simple plot',\n 'voyeurism',\n 'revolution',\n 'Anton Yelchin',\n 'based on book',\n 'trilogy',\n 'Rick Moranis',\n 'mountain climbing',\n 'time-lapse',\n 'crime',\n 'corruption',\n ...}"
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tag_counts = df_tag.tag.value_counts()\n",
    "top_tags = set(tag_counts[tag_counts >= 20].index)\n",
    "top_tags"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "<class 'pandas.core.frame.DataFrame'>\nInt64Index: 367091 entries, 1 to 465562\nData columns (total 3 columns):\nuserId     367091 non-null int64\nmovieId    367091 non-null int64\ntag        367091 non-null object\ndtypes: int64(2), object(1)\nmemory usage: 11.2+ MB\n"
    }
   ],
   "source": [
    "df_tag = df_tag[df_tag.tag.isin(top_tags)]\n",
    "df_tag.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "762"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tag2index = dict(((tag, index) for (index,tag) in enumerate(top_tags)))\n",
    "tag2index['lovecraft']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>userId</th>\n      <th>movieId</th>\n      <th>tag</th>\n      <th>tag_index</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>65</td>\n      <td>208</td>\n      <td>dark hero</td>\n      <td>1083</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>65</td>\n      <td>353</td>\n      <td>dark hero</td>\n      <td>1083</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>65</td>\n      <td>521</td>\n      <td>noir thriller</td>\n      <td>2224</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>65</td>\n      <td>592</td>\n      <td>dark hero</td>\n      <td>1083</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>65</td>\n      <td>668</td>\n      <td>bollywood</td>\n      <td>1799</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>465557</th>\n      <td>138446</td>\n      <td>7164</td>\n      <td>Peter Pan</td>\n      <td>1413</td>\n    </tr>\n    <tr>\n      <th>465558</th>\n      <td>138446</td>\n      <td>7164</td>\n      <td>visually appealing</td>\n      <td>526</td>\n    </tr>\n    <tr>\n      <th>465560</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>Jason Bateman</td>\n      <td>219</td>\n    </tr>\n    <tr>\n      <th>465561</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>quirky</td>\n      <td>2428</td>\n    </tr>\n    <tr>\n      <th>465562</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>sad</td>\n      <td>1129</td>\n    </tr>\n  </tbody>\n</table>\n<p>367091 rows × 4 columns</p>\n</div>",
      "text/plain": "        userId  movieId                 tag  tag_index\n1           65      208           dark hero       1083\n2           65      353           dark hero       1083\n3           65      521       noir thriller       2224\n4           65      592           dark hero       1083\n5           65      668           bollywood       1799\n...        ...      ...                 ...        ...\n465557  138446     7164           Peter Pan       1413\n465558  138446     7164  visually appealing        526\n465560  138446    55999       Jason Bateman        219\n465561  138446    55999              quirky       2428\n465562  138446    55999                 sad       1129\n\n[367091 rows x 4 columns]"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tag['tag_index'] = df_tag.tag.apply(lambda t: tag2index[t])\n",
    "df_tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "(6731, 16934, 2952)\n"
    }
   ],
   "source": [
    "user_count = len(df_tag.userId.unique())\n",
    "movie_count = len(df_tag.movieId.unique())\n",
    "tag_count = len(top_tags)\n",
    "print((user_count, movie_count,tag_count))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_tag_matrix = np.zeros((user_count, tag_count), dtype = 'i1' )\n",
    "moive_tag_matrix = np.zeros((movie_count, tag_count), dtype = 'i2' )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "uid2index = dict((uid, i) for (i, uid) in enumerate(df_tag.userId.unique()))\n",
    "moive2index = dict((uid, i) for (i, uid) in enumerate(df_tag.movieId.unique()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "(0, 1)\n"
    }
   ],
   "source": [
    "print((uid2index[65],moive2index[353]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "for (uid, tag_i), group_uid_tag in df_tag.groupby(['userId', 'tag_index']):\n",
    "    # print((uid2index[uid], tag_i, len(group_uid_tag)))\n",
    "    user_tag_matrix[uid2index[uid], tag_i] =  len(group_uid_tag)\n",
    "    # break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0],\n       ...,\n       [0, 0, 0, ..., 0, 0, 0],\n       [0, 1, 0, ..., 0, 0, 0],\n       [0, 0, 0, ..., 0, 0, 0]], dtype=int8)"
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# should be 5\n",
    "user_tag_matrix[0,25]\n",
    "user_tag_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "for (movieId, tag_i), group_movieId_tag in df_tag.groupby(['movieId', 'tag_index']):\n",
    "    # print((moive2index[movieId], tag_i, len(group_movieId_tag)))\n",
    "    moive_tag_matrix[moive2index[movieId], tag_i] =  len(group_movieId_tag)\n",
    "    # break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "0"
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# should be 1\n",
    "user_tag_matrix[942, 43]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_tag['user_index'] = df_tag.userId.apply(lambda i: uid2index[i])\n",
    "df_tag['movie_index'] = df_tag.movieId.apply(lambda i: moive2index[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>userId</th>\n      <th>movieId</th>\n      <th>tag</th>\n      <th>tag_index</th>\n      <th>user_index</th>\n      <th>movie_index</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>65</td>\n      <td>208</td>\n      <td>dark hero</td>\n      <td>1083</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>65</td>\n      <td>353</td>\n      <td>dark hero</td>\n      <td>1083</td>\n      <td>0</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>65</td>\n      <td>521</td>\n      <td>noir thriller</td>\n      <td>2224</td>\n      <td>0</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>65</td>\n      <td>592</td>\n      <td>dark hero</td>\n      <td>1083</td>\n      <td>0</td>\n      <td>3</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>65</td>\n      <td>668</td>\n      <td>bollywood</td>\n      <td>1799</td>\n      <td>0</td>\n      <td>4</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>465557</th>\n      <td>138446</td>\n      <td>7164</td>\n      <td>Peter Pan</td>\n      <td>1413</td>\n      <td>6730</td>\n      <td>1986</td>\n    </tr>\n    <tr>\n      <th>465558</th>\n      <td>138446</td>\n      <td>7164</td>\n      <td>visually appealing</td>\n      <td>526</td>\n      <td>6730</td>\n      <td>1986</td>\n    </tr>\n    <tr>\n      <th>465560</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>Jason Bateman</td>\n      <td>219</td>\n      <td>6730</td>\n      <td>4222</td>\n    </tr>\n    <tr>\n      <th>465561</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>quirky</td>\n      <td>2428</td>\n      <td>6730</td>\n      <td>4222</td>\n    </tr>\n    <tr>\n      <th>465562</th>\n      <td>138446</td>\n      <td>55999</td>\n      <td>sad</td>\n      <td>1129</td>\n      <td>6730</td>\n      <td>4222</td>\n    </tr>\n  </tbody>\n</table>\n<p>367091 rows × 6 columns</p>\n</div>",
      "text/plain": "        userId  movieId                 tag  tag_index  user_index  \\\n1           65      208           dark hero       1083           0   \n2           65      353           dark hero       1083           0   \n3           65      521       noir thriller       2224           0   \n4           65      592           dark hero       1083           0   \n5           65      668           bollywood       1799           0   \n...        ...      ...                 ...        ...         ...   \n465557  138446     7164           Peter Pan       1413        6730   \n465558  138446     7164  visually appealing        526        6730   \n465560  138446    55999       Jason Bateman        219        6730   \n465561  138446    55999              quirky       2428        6730   \n465562  138446    55999                 sad       1129        6730   \n\n        movie_index  \n1                 0  \n2                 1  \n3                 2  \n4                 3  \n5                 4  \n...             ...  \n465557         1986  \n465558         1986  \n465560         4222  \n465561         4222  \n465562         4222  \n\n[367091 rows x 6 columns]"
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_movie_matrix = np.zeros((user_count, movie_count), dtype = bool)\n",
    "for (uix, mix), grouped in df_tag.groupby(['user_index', 'movie_index']):\n",
    "    # print ((uix, mix))\n",
    "    user_movie_matrix[uix, mix] = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([ 24,  31,  25, ...,  66, 459,  11])"
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = user_tag_matrix.astype(bool).astype('i2').sum(axis = 0)\n",
    "a"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       ...,\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.28853901, 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ]])"
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_tag_punished_matrix = user_tag_matrix  / np.log(1 + a)\n",
    "user_tag_punished_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[58],\n       [87],\n       [29],\n       ...,\n       [ 1],\n       [ 1],\n       [ 1]])"
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " user_movie_matrix.astype('i2').sum(axis = 0).reshape((movie_count, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[0.        , 0.        , 0.        , ..., 0.        , 0.98098425,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       ...,\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ]])"
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "movie_tag_punished_matrix = moive_tag_matrix/np.log( 1 + user_movie_matrix.astype('i2').sum(axis = 0).reshape((movie_count, 1)))\n",
    "movie_tag_punished_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "array([[2.03865201, 7.01386979, 5.8117528 , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 0.55238004, 0.76444404, ..., 0.        , 0.        ,\n        0.        ],\n       ...,\n       [1.69351593, 2.31075095, 0.        , ..., 0.        , 0.        ,\n        0.        ],\n       [0.        , 4.29787296, 0.69172319, ..., 0.        , 0.2254706 ,\n        0.        ],\n       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,\n        0.        ]])"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_movie_intrest_matrix = np.dot(user_tag_punished_matrix, movie_tag_punished_matrix.T)\n",
    "user_movie_intrest_matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "[2.03865201 7.01386979 5.8117528  ... 0.         0.         0.        ]\n"
    }
   ],
   "source": [
    "for l in user_movie_intrest_matrix: \n",
    "    print(l)\n",
    "    break "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>userId</th>\n      <th>movieId</th>\n      <th>rating</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>7399</th>\n      <td>65</td>\n      <td>24</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>7400</th>\n      <td>65</td>\n      <td>318</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>7401</th>\n      <td>65</td>\n      <td>356</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>7402</th>\n      <td>65</td>\n      <td>364</td>\n      <td>4.5</td>\n    </tr>\n    <tr>\n      <th>7403</th>\n      <td>65</td>\n      <td>443</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>...</th>\n      <td>...</td>\n      <td>...</td>\n      <td>...</td>\n    </tr>\n    <tr>\n      <th>19994202</th>\n      <td>138446</td>\n      <td>88140</td>\n      <td>4.5</td>\n    </tr>\n    <tr>\n      <th>19994203</th>\n      <td>138446</td>\n      <td>89745</td>\n      <td>5.0</td>\n    </tr>\n    <tr>\n      <th>19994204</th>\n      <td>138446</td>\n      <td>90866</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>19994205</th>\n      <td>138446</td>\n      <td>91500</td>\n      <td>4.0</td>\n    </tr>\n    <tr>\n      <th>19994206</th>\n      <td>138446</td>\n      <td>91542</td>\n      <td>3.5</td>\n    </tr>\n  </tbody>\n</table>\n<p>2521621 rows × 3 columns</p>\n</div>",
      "text/plain": "          userId  movieId  rating\n7399          65       24     4.0\n7400          65      318     5.0\n7401          65      356     5.0\n7402          65      364     4.5\n7403          65      443     4.0\n...          ...      ...     ...\n19994202  138446    88140     4.5\n19994203  138446    89745     5.0\n19994204  138446    90866     4.0\n19994205  138446    91500     4.0\n19994206  138446    91542     3.5\n\n[2521621 rows x 3 columns]"
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_rating = df_rating[df_rating.userId.isin(uid2index) & df_rating.movieId.isin(moive2index)]\n",
    "df_rating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "<class 'pandas.core.frame.DataFrame'>\nInt64Index: 2521621 entries, 7399 to 19994206\nData columns (total 3 columns):\nuserId     int64\nmovieId    int64\nrating     float64\ndtypes: float64(1), int64(2)\nmemory usage: 77.0 MB\n"
    }
   ],
   "source": [
    "df_rating.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "(65, <bound method IndexOpsMixin.tolist of 7399       24\n7400      318\n7401      356\n7402      364\n7403      443\n        ...  \n7472    70286\n7473    71462\n7474    72998\n7475    76093\n7476    79132\nName: movieId, Length: 63, dtype: int64>)\n"
    }
   ],
   "source": [
    "for uid, grouped in df_rating[df_rating.rating>= 3].groupby('userId'):\n",
    "    print( (uid, grouped.movieId.to_list))\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": ", 3398, 3399, 3400, 3401, 3402, 3403, 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438, 3439, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, 3452, 3453, 3454, 3455, 3456, 3457, 3458, 3459, 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, 3476, 3477, 3478, 3479, 3480, 3481, 3482, 3483, 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, 3492, 3493, 3494, 3495, 3496, 3497, 3498, 3499, 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, 3508, 3509, 3510, 3511, 3512, 3513, 3514, 3515, 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, 3532, 3533, 3534, 3535, 3536, 3537, 3538, 3539, 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, 3548, 3549, 3550, 3551, 3552, 3553, 3554, 3555, 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, 3644, 3645, 3646, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3685, 3686, 3687, 3688, 3689, 3690, 3691, 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, 3700, 3701, 3702, 3703, 3704, 3705, 3706, 3707, 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, 3740, 3741, 3742, 3743, 3744, 3745, 3746, 3747, 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, 3756, 3757, 3758, 3759, 3760, 3761, 3762, 3763, 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, 3772, 3773, 3774, 3775, 3776, 3777, 3778, 3779, 3780, 3781, 3782, 3783, 3784, 3785, 3786, 3787, 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, 3796, 3797, 3798, 3799, 3800, 3801, 3802, 3803, 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, 3812, 3813, 3814, 3815, 3816, 3817, 3818, 3819, 3820, 3821, 3822, 3823, 3824, 3825, 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, 3834, 3835, 3836, 3837, 3838, 3839, 3840, 3841, 3842, 3843, 3844, 3845, 3846, 3847, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3861, 3862, 3863, 3864, 3865, 3866, 3867, 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3892, 3893, 3894, 3895, 3896, 3897, 3898, 3899, 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, 3908, 3909, 3910, 3911, 3912, 3913, 3914, 3915, 3916, 3917, 3918, 3919, 3920, 3921, 3922, 3923, 3924, 3925, 3926, 3927, 3928, 3929, 3930, 3931, 3932, 3933, 3934, 3935, 3936, 3937, 3938, 3939, 3940, 3941, 3942, 3943, 3944, 3945, 3946, 3947, 3948, 3949, 3950, 3951, 3952, 3953, 3954, 3955, 3956, 3957, 3958, 3959, 3960, 3961, 3962, 3963, 3964, 3965, 3966, 3967, 3968, 3969, 3970, 3971, 3972, 3973, 3974, 3975, 3976, 3977, 3978, 3979, 3980, 3981, 3982, 3983, 3984, 3985, 3986, 3987, 3988, 3989, 3990, 3991, 3992, 3993, 3994, 3995, 3996, 3997, 3998, 3999, 4000, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008, 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4017, 4018, 4019, 4020, 4021, 4022, 4023, 4024, 4025, 4026, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036, 4037, 4038, 4039, 4040, 4041, 4042, 4043, 4044, 4045, 4046, 4047, 4048, 4049, 4050, 4051, 4052, 4053, 4054, 4055, 4056, 4057, 4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065, 4066, 4067, 4068, 4069, 4070, 4071, 4072, 4073, 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, 4082, 4083, 4084, 4085, 4086, 4087, 4088, 4089, 4090, 4091, 4092, 4093, 4094, 4095, 4096, 4097, 4098, 4099, 4100, 4101, 4102, 4103, 4104, 4105, 4106, 4107, 4108, 4109, 4110, 4111, 4112, 4113, 4114, 4115, 4116, 4117, 4118, 4119, 4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132, 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145, 4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158, 4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171, 4172, 4173, 4174, 4175, 4176, 4177, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185, 4186, 4187, 4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, 4196, 4197, 4198, 4199, 4200, 4201, 4202, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213, 4214, 4215, 4216, 4217, 4218, 4219, 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227, 4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, 4236, 4237, 4238, 4239, 4240, 4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249, 4250, 4251, 4252, 4253, 4254, 4255, 4256, 4257, 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, 4266, 4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, 4279, 4280, 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, 4289, 4290, 4291, 4292, 4293, 4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4305, 4306, 4307, 4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4320, 4321, 4322, 4323, 4324, 4325, 4326, 4327, 4328, 4329, 4330, 4331, 4332, 4333, 4334, 4335, 4336, 4337, 4338, 4339, 4340, 4341, 4342, 4343, 4344, 4345, 4346, 4347, 4348, 4349, 4350, 4351, 4352, 4353, 4354, 4355, 4356, 4357, 4358, 4359, 4360, 4361, 4362, 4363, 4364, 4365, 4366, 4367, 4368, 4369, 4370, 4371, 4372, 4373, 4374, 4375, 4376, 4377, 4378, 4379, 4380, 4381, 4382, 4383, 4384, 4385, 4386, 4387, 4388, 4389, 4390, 4391, 4392, 4393, 4394, 4395, 4396, 4397, 4398, 4399, 4400, 4401, 4402, 4403, 4404, 4405, 4406, 4407, 4408, 4409, 4410, 4411, 4412, 4413, 4414, 4415, 4416, 4417, 4418, 4419, 4420, 4421, 4422, 4423, 4424, 4425, 4426, 4427, 4428, 4429, 4430, 4431, 4432, 4433, 4434, 4435, 4436, 4437, 4438, 4439, 4440, 4441, 4442, 4443, 4444, 4445, 4446, 4447, 4448, 4449, 4450, 4451, 4452, 4453, 4454, 4455, 4456, 4457, 4458, 4459, 4460, 4461, 4462, 4463, 4464, 4465, 4466, 4467, 4468, 4469, 4470, 4471, 4472, 4473, 4474, 4475, 4476, 4477, 4478, 4479, 4480, 4481, 4482, 4483, 4484, 4485, 4486, 4487, 4488, 4489, 4490, 4491, 4492, 4493, 4494, 4495, 4496, 4497, 4498, 4499, 4500, 4501, 4502, 4503, 4504, 4505, 4506, 4507, 4508, 4509, 4510, 4511, 4512, 4513, 4514, 4515, 4516, 4517, 4518, 4519, 4520, 4521, 4522, 4523, 4524, 4525, 4526, 4527, 4528, 4529, 4530, 4531, 4532, 4533, 4534, 4535, 4536, 4537, 4538, 4539, 4540, 4541, 4542, 4543, 4544, 4545, 4546, 4547, 4548, 4549, 4550, 4551, 4552, 4553, 4554, 4555, 4556, 4557, 4558, 4559, 4560, 4561, 4562, 4563, 4564, 4565, 4566, 4567, 4568, 4569, 4570, 4571, 4572, 4573, 4574, 4575, 4576, 4577, 4578, 4579, 4580, 4581, 4582, 4583, 4584, 4585, 4586, 4587, 4588, 4589, 4590, 4591, 4592, 4593, 4594, 4595, 4596, 4597, 4598, 4599, 4600, 4601, 4602, 4603, 4604, 4605, 4606, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616, 4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, 4625, 4626, 4627, 4628, 4629, 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642, 4643, 4644, 4645, 4646, 4647, 4648, 4649, 4650, 4651, 4652, 4653, 4654, 4655, 4656, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665, 4666, 4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4675, 4676, 4677, 4678, 4679, 4680, 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, 4689, 4690, 4691, 4692, 4693, 4694, 4695, 4696, 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704, 4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, 4713, 4714, 4715, 4716, 4717, 4718, 4719, 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, 4728, 4729, 4730, 4731, 4732, 4733, 4734, 4735, 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743, 4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, 4752, 4753, 4754, 4755, 4756, 4757, 4758, 4759, 4760, 4761, 4762, 4763, 4764, 4765, 4766, 4767, 4768, 4769, 4770, 4771, 4772, 4773, 4774, 4775, 4776, 4777, 4778, 4779, 4780, 4781, 4782, 4783, 4784, 4785, 4786, 4787, 4788, 4789, 4790, 4791, 4792, 4793, 4794, 4795, 4796, 4797, 4798, 4799, 4800, 4801, 4802, 4803, 4804, 4805, 4806, 4807, 4808, 4809, 4810, 4811, 4812, 4813, 4814, 4815, 4816, 4817, 4818, 4819, 4820, 4821, 4822, 4823, 4824, 4825, 4826, 4827, 4828, 4829, 4830, 4831, 4832, 4833, 4834, 4835, 4836, 4837, 4838, 4839, 4840, 4841, 4842, 4843, 4844, 4845, 4846, 4847, 4848, 4849, 4850, 4851, 4852, 4853, 4854, 4855, 4856, 4857, 4858, 4859, 4860, 4861, 4862, 4863, 4864, 4865, 4866, 4867, 4868, 4869, 4870, 4871, 4872, 4873, 4874, 4875, 4876, 4877, 4878, 4879, 4880, 4881, 4882, 4883, 4884, 4885, 4886, 4887, 4888, 4889, 4890, 4891, 4892, 4893, 4894, 4895, 4896, 4897, 4898, 4899, 4900, 4901, 4902, 4903, 4904, 4905, 4906, 4907, 4908, 4909, 4910, 4911, 4912, 4913, 4914, 4915, 4916, 4917, 4918, 4919, 4920, 4921, 4922, 4923, 4924, 4925, 4926, 4927, 4928, 4929, 4930, 4931, 4932, 4933, 4934, 4935, 4936, 4937, 4938, 4939, 4940, 4941, 4942, 4943, 4944, 4945, 4946, 4947, 4948, 4949, 4950, 4951, 4952, 4953, 4954, 4955, 4956, 4957, 4958, 4959, 4960, 4961, 4962, 4963, 4964, 4965, 4966, 4967, 4968, 4969, 4970, 4971, 4972, 4973, 4974, 4975, 4976, 4977, 4978, 4979, 4980, 4981, 4982, 4983, 4984, 4985, 4986, 4987, 4988, 4989, 4990, 4991, 4992, 4993, 4994, 4995, 4996, 4997, 4998, 4999, 5000, 5001, 5002, 5003, 5004, 5005, 5006, 5007, 5008, 5009, 5010, 5011, 5012, 5013, 5014, 5015, 5016, 5017, 5018, 5019, 5020, 5021, 5022, 5023, 5024, 5025, 5026, 5027, 5028, 5029, 5030, 5031, 5032, 5033, 5034, 5035, 5036, 5037, 5038, 5039, 5040, 5041, 5042, 5043, 5044, 5045, 5046, 5047, 5048, 5049, 5050, 5051, 5052, 5053, 5054, 5055, 5056, 5057, 5058, 5059, 5060, 5061, 5062, 5063, 5064, 5065, 5066, 5067, 5068, 5069, 5070, 5071, 5072, 5073, 5074, 5075, 5076, 5077, 5078, 5079, 5080, 5081, 5082, 5083, 5084, 5085, 5086, 5087, 5088, 5089, 5090, 5091, 5092, 5093, 5094, 5095, 5096, 5097, 5098, 5099, 5100, 5101, 5102, 5103, 5104, 5105, 5106, 5107, 5108, 5109, 5110, 5111, 5112, 5113, 5114, 5115, 5116, 5117, 5118, 5119, 5120, 5121, 5122, 5123, 5124, 5125, 5126, 5127, 5128, 5129, 5130, 5131, 5132, 5133, 5134, 5135, 5136, 5137, 5138, 5139, 5140, 5141, 5142, 5143, 5144, 5145, 5146, 5147, 5148, 5149, 5150, 5151, 5152, 5153, 5154, 5155, 5156, 5157, 5158, 5159, 5160, 5161, 5162, 5163, 5164, 5165, 5166, 5167, 5168, 5169, 5170, 5171, 5172, 5173, 5174, 5175, 5176, 5177, 5178, 5179, 5180, 5181, 5182, 5183, 5184, 5185, 5186, 5187, 5188, 5189, 5190, 5191, 5192, 5193, 5194, 5195, 5196, 5197, 5198, 5199, 5200, 5201, 5202, 5203, 5204, 5205, 5206, 5207, 5208, 5209, 5210, 5211, 5212, 5213, 5214, 5215, 5216, 5217, 5218, 5219, 5220, 5221, 5222, 5223, 5224, 5225, 5226, 5227, 5228, 5229, 5230, 5231, 5232, 5233, 5234, 5235, 5236, 5237, 5238, 5239, 5240, 5241, 5242, 5243, 5244, 5245, 5246, 5247, 5248, 5249, 5250, 5251, 5252, 5253, 5254, 5255, 5256, 5257, 5258, 5259, 5260, 5261, 5262, 5263, 5264, 5265, 5266, 5267, 5268, 5269, 5270, 5271, 5272, 5273, 5274, 5275, 5276, 5277, 5278, 5279, 5280, 5281, 5282, 5283, 5284, 5285, 5286, 5287, 5288, 5289, 5290, 5291, 5292, 5293, 5294, 5295, 5296, 5297, 5298, 5299, 5300, 5301, 5302, 5303, 5304, 5305, 5306, 5307, 5308, 5309, 5310, 5311, 5312, 5313, 5314, 5315, 5316, 5317, 5318, 5319, 5320, 5321, 5322, 5323, 5324, 5325, 5326, 5327, 5328, 5329, 5330, 5331, 5332, 5333, 5334, 5335, 5336, 5337, 5338, 5339, 5340, 5341, 5342, 5343, 5344, 5345, 5346, 5347, 5348, 5349, 5350, 5351, 5352, 5353, 5354, 5355, 5356, 5357, 5358, 5359, 5360, 5361, 5362, 5363, 5364, 5365, 5366, 5367, 5368, 5369, 5370, 5371, 5372, 5373, 5374, 5375, 5376, 5377, 5378, 5379, 5380, 5381, 5382, 5383, 5384, 5385, 5386, 5387, 5388, 5389, 5390, 5391, 5392, 5393, 5394, 5395, 5396, 5397, 5398, 5399, 5400, 5401, 5402, 5403, 5404, 5405, 5406, 5407, 5408, 5409, 5410, 5411, 5412, 5413, 5414, 5415, 5416, 5417, 5418, 5419, 5420, 5421, 5422, 5423, 5424, 5425, 5426, 5427, 5428, 5429, 5430, 5431, 5432, 5433, 5434, 5435, 5436, 5437, 5438, 5439, 5440, 5441, 5442, 5443, 5444, 5445, 5446, 5447, 5448, 5449, 5450, 5451, 5452, 5453, 5454, 5455, 5456, 5457, 5458, 5459, 5460, 5461, 5462, 5463, 5464, 5465, 5466, 5467, 5468, 5469, 5470, 5471, 5472, 5473, 5474, 5475, 5476, 5477, 5478, 5479, 5480, 5481, 5482, 5483, 5484, 5485, 5486, 5487, 5488, 5489, 5490, 5491, 5492, 5493, 5494, 5495, 5496, 5497, 5498, 5499, 5500, 5501, 5502, 5503, 5504, 5505, 5506, 5507, 5508, 5509, 5510, 5511, 5512, 5513, 5514, 5515, 5516, 5517, 5518, 5519, 5520, 5521, 5522, 5523, 5524, 5525, 5526, 5527, 5528, 5529, 5530, 5531, 5532, 5533, 5534, 5535, 5536, 5537, 5538, 5539, 5540, 5541, 5542, 5543, 5544, 5545, 5546, 5547, 5548, 5549, 5550, 5551, 5552, 5553, 5554, 5555, 5556, 5557, 5558, 5559, 5560, 5561, 5562, 5563, 5564, 5565, 5566, 5567, 5568, 5569, 5570, 5571, 5572, 5573, 5574, 5575, 5576, 5577, 5578, 5579, 5580, 5581, 5582, 5583, 5584, 5585, 5586, 5587, 5588, 5589, 5590, 5591, 5592, 5593, 5594, 5595, 5596, 5597, 5598, 5599, 5600, 5601, 5602, 5603, 5604, 5605, 5606, 5607, 5608, 5609, 5610, 5611, 5612, 5613, 5614, 5615, 5616, 5617, 5618, 5619, 5620, 5621, 5622, 5623, 5624, 5625, 5626, 5627, 5628, 5629, 5630, 5631, 5632, 5633, 5634, 5635, 5636, 5637, 5638, 5639, 5640, 5641, 5642, 5643, 5644, 5645, 5646, 5647, 5648, 5649, 5650, 5651, 5652, 5653, 5654, 5655, 5656, 5657, 5658, 5659, 5660, 5661, 5662, 5663, 5664, 5665, 5666, 5667, 5668, 5669, 5670, 5671, 5672, 5673, 5674, 5675, 5676, 5677, 5678, 5679, 5680, 5681, 5682, 5683, 5684, 5685, 5686, 5687, 5688, 5689, 5690, 5691, 5692, 5693, 5694, 5695, 5696, 5697, 5698, 5699, 5700, 5701, 5702, 5703, 5704, 5705, 5706, 5707, 5708, 5709, 5710, 5711, 5712, 5713, 5714, 5715, 5716, 5717, 5718, 5719, 5720, 5721, 5722, 5723, 5724, 5725, 5726, 5727, 5728, 5729, 5730, 5731, 5732, 5733, 5734, 5735, 5736, 5737, 5738, 5739, 5740, 5741, 5742, 5743, 5744, 5745, 5746, 5747, 5748, 5749, 5750, 5751, 5752, 5753, 5754, 5755, 5756, 5757, 5758, 5759, 5760, 5761, 5762, 5763, 5764, 5765, 5766, 5767, 5768, 5769, 5770, 5771, 5772, 5773, 5774, 5775, 5776, 5777, 5778, 5779, 5780, 5781, 5782, 5783, 5784, 5785, 5786, 5787, 5788, 5789, 5790, 5791, 5792, 5793, 5794, 5795, 5796, 5797, 5798, 5799, 5800, 5801, 5802, 5803, 5804, 5805, 5806, 5807, 5808, 5809, 5810, 5811, 5812, 5813, 5814, 5815, 5816, 5817, 5818, 5819, 5820, 5821, 5822, 5823, 5824, 5825, 5826, 5827, 5828, 5829, 5830, 5831, 5832, 5833, 5834, 5835, 5836, 5837, 5838, 5839, 5840, 5841, 5842, 5843, 5844, 5845, 5846, 5847, 5848, 5849, 5850, 5851, 5852, 5853, 5854, 5855, 5856, 5857, 5858, 5859, 5860, 5861, 5862, 5863, 5864, 5865, 5866, 5867, 5868, 5869, 5870, 5871, 5872, 5873, 5874, 5875, 5876, 5877, 5878, 5879, 5880, 5881, 5882, 5883, 5884, 5885, 5886, 5887, 5888, 5889, 5890, 5891, 5892, 5893, 5894, 5895, 5896, 5897, 5898, 5899, 5900, 5901, 5902, 5903, 5904, 5905, 5906, 5907, 5908, 5909, 5910, 5911, 5912, 5913, 5914, 5915, 5916, 5917, 5918, 5919, 5920, 5921, 5922, 5923, 5924, 5925, 5926, 5927, 5928, 5929, 5930, 5931, 5932, 5933, 5934, 5935, 5936, 5937, 5938, 5939, 5940, 5941, 5942, 5943, 5944, 5945, 5946, 5947, 5948, 5949, 5950, 5951, 5952, 5953, 5954, 5955, 5956, 5957, 5958, 5959, 5960, 5961, 5962, 5963, 5964, 5965, 5966, 5967, 5968, 5969, 5970, 5971, 5972, 5973, 5974, 5975, 5976, 5977, 5978, 5979, 5980, 5981, 5982, 5983, 5984, 5985, 5986, 5987, 5988, 5989, 5990, 5991, 5992, 5993, 5994, 5995, 5996, 5997, 5998, 5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, 6019, 6020, 6021, 6022, 6023, 6024, 6025, 6026, 6027, 6028, 6029, 6030, 6031, 6032, 6033, 6034, 6035, 6036, 6037, 6038, 6039, 6040, 6041, 6042, 6043, 6044, 6045, 6046, 6047, 6048, 6049, 6050, 6051, 6052, 6053, 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, 6062, 6063, 6064, 6065, 6066, 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, 6079, 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, 6090, 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, 6101, 6102, 6103, 6104, 6105, 6106, 6107, 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, 6116, 6117, 6118, 6119, 6120, 6121, 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, 6130, 6131, 6132, 6133, 6134, 6135, 6136, 6137, 6138, 6139, 6140, 6141, 6142, 6143, 6144, 6145, 6146, 6147, 6148, 6149, 6150, 6151, 6152, 6153, 6154, 6155, 6156, 6157, 6158, 6159, 6160, 6161, 6162, 6163, 6164, 6165, 6166, 6167, 6168, 6169, 6170, 6171, 6172, 6173, 6174, 6175, 6176, 6177, 6178, 6179, 6180, 6181, 6182, 6183, 6184, 6185, 6186, 6187, 6188, 6189, 6190, 6191, 6192, 6193, 6194, 6195, 6196, 6197, 6198, 6199, 6200, 6201, 6202, 6203, 6204, 6205, 6206, 6207, 6208, 6209, 6210, 6211, 6212, 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, 6221, 6222, 6223, 6224, 6225, 6226, 6227, 6228, 6229, 6230, 6231, 6232, 6233, 6234, 6235, 6236, 6237, 6238, 6239, 6240, 6241, 6242, 6243, 6244, 6245, 6246, 6247, 6248, 6249, 6250, 6251, 6252, 6253, 6254, 6255, 6256, 6257, 6258, 6259, 6260, 6261, 6262, 6263, 6264, 6265, 6266, 6267, 6268, 6269, 6270, 6271, 6272, 6273, 6274, 6275, 6276, 6277, 6278, 6279, 6280, 6281, 6282, 6283, 6284, 6285, 6286, 6287, 6288, 6289, 6290, 6291, 6292, 6293, 6294, 6295, 6296, 6297, 6298, 6299, 6300, 6301, 6302, 6303, 6304, 6305, 6306, 6307, 6308, 6309, 6310, 6311, 6312, 6313, 6314, 6315, 6316, 6317, 6318, 6319, 6320, 6321, 6322, 6323, 6324, 6325, 6326, 6327, 6328, 6329, 6330, 6331, 6332, 6333, 6334, 6335, 6336, 6337, 6338, 6339, 6340, 6341, 6342, 6343, 6344, 6345, 6346, 6347, 6348, 6349, 6350, 6351, 6352, 6353, 6354, 6355, 6356, 6357, 6358, 6359, 6360, 6361, 6362, 6363, 6364, 6365, 6366, 6367, 6368, 6369, 6370, 6371, 6372, 6373, 6374, 6375, 6376, 6377, 6378, 6379, 6380, 6381, 6382, 6383, 6384, 6385, 6386, 6387, 6388, 6389, 6390, 6391, 6392, 6393, 6394, 6395, 6396, 6397, 6398, 6399, 6400, 6401, 6402, 6403, 6404, 6405, 6406, 6407, 6408, 6409, 6410, 6411, 6412, 6413, 6414, 6415, 6416, 6417, 6418, 6419, 6420, 6421, 6422, 6423, 6424, 6425, 6426, 6427, 6428, 6429, 6430, 6431, 6432, 6433, 6434, 6435, 6436, 6437, 6438, 6439, 6440, 6441, 6442, 6443, 6444, 6445, 6446, 6447, 6448, 6449, 6450, 6451, 6452, 6453, 6454, 6455, 6456, 6457, 6458, 6459, 6460, 6461, 6462, 6463, 6464, 6465, 6466, 6467, 6468, 6469, 6470, 6471, 6472, 6473, 6474, 6475, 6476, 6477, 6478, 6479, 6480, 6481, 6482, 6483, 6484, 6485, 6486, 6487, 6488, 6489, 6490, 6491, 6492, 6493, 6494, 6495, 6496, 6497, 6498, 6499, 6500, 6501, 6502, 6503, 6504, 6505, 6506, 6507, 6508, 6509, 6510, 6511, 6512, 6513, 6514, 6515, 6516, 6517, 6518, 6519, 6520, 6521, 6522, 6523, 6524, 6525, 6526, 6527, 6528, 6529, 6530, 6531, 6532, 6533, 6534, 6535, 6536, 6537, 6538, 6539, 6540, 6541, 6542, 6543, 6544, 6545, 6546, 6547, 6548, 6549, 6550, 6551, 6552, 6553, 6554, 6555, 6556, 6557, 6558, 6559, 6560, 6561, 6562, 6563, 6564, 6565, 6566, 6567, 6568, 6569, 6570, 6571, 6572, 6573, 6574, 6575, 6576, 6577, 6578, 6579, 6580, 6581, 6582, 6583, 6584, 6585, 6586, 6587, 6588, 6589, 6590, 6591, 6592, 6593, 6594, 6595, 6596, 6597, 6598, 6599, 6600, 6601, 6602, 6603, 6604, 6605, 6606, 6607, 6608, 6609, 6610, 6611, 6612, 6613, 6614, 6615, 6616, 6617, 6618, 6619, 6620, 6621, 6622, 6623, 6624, 6625, 6626, 6627, 6628, 6629, 6630, 6631, 6632, 6633, 6634, 6635, 6636, 6637, 6638, 6639, 6640, 6641, 6642, 6643, 6644, 6645, 6646, 6647, 6648, 6649, 6650, 6651, 6652, 6653, 6654, 6655, 6656, 6657, 6658, 6659, 6660, 6661, 6662, 6663, 6664, 6665, 6666, 6667, 6668, 6669, 6670, 6671, 6672, 6673, 6674, 6675, 6676, 6677, 6678, 6679, 6680, 6681, 6682, 6683, 6684, 6685, 6686, 6687, 6688, 6689, 6690, 6691, 6692, 6693, 6694, 6695, 6696, 6697, 6698, 6699, 6700, 6701, 6702, 6703, 6704, 6705, 6706, 6707, 6708, 6709, 6710, 6711, 6712, 6713, 6714, 6715, 6716, 6717, 6718, 6719, 6720, 6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730])"
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_fav = dict (\n",
    "    (uid2index[uid], set(grouped.movieId.apply(lambda m: moive2index[m])))\n",
    "    for uid, grouped in df_rating[df_rating.rating>= 3].groupby('userId')\n",
    ")\n",
    "user_fav.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "{0,\n 1,\n 2,\n 3,\n 4,\n 5,\n 6,\n 7,\n 8,\n 9,\n 10,\n 11,\n 12,\n 13,\n 14,\n 15,\n 16,\n 17,\n 18,\n 19,\n 20,\n 21,\n 22,\n 23,\n 24,\n 27,\n 28,\n 29,\n 52,\n 66,\n 81,\n 96,\n 101,\n 105,\n 125,\n 131,\n 148,\n 227,\n 344,\n 345,\n 388,\n 419,\n 428,\n 432,\n 438,\n 447,\n 452,\n 457,\n 460,\n 463,\n 464,\n 465,\n 466,\n 469,\n 472,\n 474,\n 475,\n 481,\n 483,\n 486,\n 488,\n 489,\n 527,\n 535,\n 595,\n 596,\n 625,\n 626,\n 630,\n 637,\n 638,\n 644,\n 684,\n 743,\n 744,\n 777,\n 824,\n 933,\n 951,\n 1153,\n 1235,\n 1243,\n 1247,\n 1296,\n 1326,\n 1349,\n 1352,\n 1636,\n 1757,\n 1877,\n 1971,\n 2234,\n 2252,\n 2636,\n 2638,\n 2657,\n 2677,\n 2682,\n 3185,\n 4330,\n 4404,\n 4416,\n 4420,\n 5256,\n 6409,\n 6642,\n 8227,\n 8615,\n 8627,\n 8638,\n 8822,\n 10329,\n 10553,\n 11853,\n 11996,\n 14184,\n 16330,\n 16331,\n 16332,\n 16333,\n 16517}"
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(np.where(user_movie_intrest_matrix[0] >= 1)[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": "2234,\n  2235,\n  4282,\n  2237,\n  4283,\n  4285,\n  192,\n  4286,\n  4288,\n  8382,\n  4297,\n  10441,\n  14538,\n  2252,\n  6351,\n  4112,\n  8401,\n  2259,\n  4308,\n  6148,\n  8402,\n  2263,\n  2264,\n  10457,\n  4315,\n  222,\n  4319,\n  224,\n  6367,\n  6369,\n  227,\n  6371,\n  8421,\n  14564,\n  2280,\n  4330,\n  237,\n  2286,\n  6153,\n  6383,\n  2289,\n  2290,\n  2293,\n  2295,\n  6155,\n  4346,\n  251,\n  2302,\n  2303,\n  6399,\n  12547,\n  16644,\n  10505,\n  2316,\n  4364,\n  4366,\n  2320,\n  4368,\n  4369,\n  277,\n  2326,\n  10517,\n  6427,\n  2332,\n  2336,\n  6433,\n  6977,\n  10530,\n  2347,\n  300,\n  10544,\n  305,\n  306,\n  2356,\n  4406,\n  6455,\n  14648,\n  2361,\n  2362,\n  2363,\n  6459,\n  2365,\n  4413,\n  16694,\n  320,\n  2372,\n  325,\n  14660,\n  14661,\n  14663,\n  14669,\n  6478,\n  335,\n  336,\n  337,\n  2383,\n  2384,\n  340,\n  341,\n  2387,\n  10243,\n  344,\n  345,\n  4441,\n  347,\n  10582,\n  10586,\n  2398,\n  351,\n  8211,\n  8542,\n  355,\n  10595,\n  357,\n  8550,\n  8551,\n  8552,\n  8554,\n  4459,\n  367,\n  368,\n  371,\n  374,\n  2423,\n  376,\n  4472,\n  378,\n  4473,\n  380,\n  6525,\n  382,\n  8574,\n  14720,\n  386,\n  388,\n  2436,\n  390,\n  391,\n  392,\n  393,\n  2441,\n  396,\n  2445,\n  14732,\n  400,\n  401,\n  402,\n  403,\n  404,\n  405,\n  406,\n  407,\n  408,\n  409,\n  410,\n  411,\n  6548,\n  413,\n  414,\n  415,\n  416,\n  8608,\n  418,\n  419,\n  2468,\n  2470,\n  423,\n  424,\n  425,\n  4521,\n  8615,\n  428,\n  429,\n  10668,\n  2479,\n  10672,\n  16808,\n  434,\n  435,\n  436,\n  437,\n  438,\n  2483,\n  2486,\n  441,\n  442,\n  2490,\n  444,\n  6582,\n  10684,\n  447,\n  448,\n  449,\n  450,\n  452,\n  454,\n  456,\n  457,\n  6602,\n  459,\n  460,\n  6603,\n  462,\n  14338,\n  466,\n  2514,\n  2515,\n  16850,\n  470,\n  472,\n  475,\n  476,\n  477,\n  2523,\n  12763,\n  481,\n  482,\n  483,\n  484,\n  485,\n  486,\n  487,\n  488,\n  489,\n  490,\n  6632,\n  2540,\n  4588,\n  4590,\n  495,\n  12777,\n  497,\n  6642,\n  10738,\n  4596,\n  14346,\n  502,\n  16880,\n  504,\n  2552,\n  506,\n  4603,\n  16888,\n  6653,\n  512,\n  513,\n  515,\n  516,\n  518,\n  522,\n  524,\n  4622,\n  527,\n  529,\n  530,\n  12724,\n  532,\n  2581,\n  10773,\n  535,\n  537,\n  2585,\n  539,\n  2587,\n  542,\n  16928,\n  545,\n  16930,\n  548,\n  549,\n  550,\n  551,\n  4645,\n  554,\n  555,\n  556,\n  557,\n  562,\n  563,\n  2611,\n  565,\n  2613,\n  567,\n  568,\n  2614,\n  6706,\n  573,\n  575,\n  577,\n  2626,\n  579,\n  580,\n  581,\n  582,\n  583,\n  584,\n  4673,\n  586,\n  587,\n  588,\n  2634,\n  590,\n  2635,\n  592,\n  593,\n  2636,\n  595,\n  2638,\n  2640,\n  2645,\n  599,\n  2647,\n  2648,\n  2650,\n  2651,\n  2652,\n  2653,\n  2654,\n  2655,\n  608,\n  2657,\n  2658,\n  2659,\n  612,\n  615,\n  2664,\n  2665,\n  618,\n  619,\n  2666,\n  621,\n  2667,\n  6763,\n  624,\n  16405,\n  627,\n  628,\n  630,\n  8822,\n  2680,\n  2682,\n  635,\n  2684,\n  637,\n  2685,\n  14373,\n  14374,\n  16409,\n  2690,\n  16410,\n  644,\n  646,\n  647,\n  648,\n  2694,\n  8839,\n  651,\n  652,\n  8843,\n  654,\n  8845,\n  656,\n  657,\n  2708,\n  661,\n  2709,\n  663,\n  664,\n  2712,\n  672,\n  674,\n  2723,\n  2724,\n  16416,\n  2726,\n  682,\n  683,\n  684,\n  685,\n  2730,\n  4780,\n  690,\n  692,\n  2741,\n  694,\n  2742,\n  696,\n  697,\n  2747,\n  702,\n  2751,\n  2752,\n  16422,\n  2755,\n  10724,\n  709,\n  2757,\n  711,\n  712,\n  713,\n  2758,\n  715,\n  2759,\n  2768,\n  2769,\n  722,\n  4819,\n  726,\n  727,\n  729,\n  731,\n  732,\n  4829,\n  734,\n  735,\n  736,\n  737,\n  2786,\n  741,\n  742,\n  743,\n  744,\n  2789,\n  4841,\n  748,\n  749,\n  4844,\n  752,\n  8944,\n  754,\n  757,\n  2807,\n  760,\n  4857,\n  762,\n  2811,\n  764,\n  2812,\n  8951,\n  2815,\n  2817,\n  2818,\n  2820,\n  4869,\n  8965,\n  13062,\n  777,\n  778,\n  2825,\n  780,\n  781,\n  782,\n  783,\n  784,\n  2826,\n  2827,\n  787,\n  2832,\n  789,\n  4883,\n  791,\n  792,\n  793,\n  2842,\n  795,\n  796,\n  797,\n  798,\n  799,\n  7079,\n  803,\n  805,\n  806,\n  807,\n  808,\n  809,\n  810,\n  2854,\n  812,\n  813,\n  814,\n  2855,\n  2858,\n  817,\n  818,\n  2859,\n  820,\n  821,\n  822,\n  2862,\n  824,\n  2865,\n  2866,\n  2867,\n  2868,\n  2871,\n  2872,\n  2874,\n  832,\n  2879,\n  834,\n  2883,\n  836,\n  4927,\n  838,\n  2886,\n  6976,\n  841,\n  842,\n  15178,\n  9036,\n  9038,\n  848,\n  849,\n  850,\n  4948,\n  853,\n  2901,\n  6996,\n  4952,\n  6997,\n  860,\n  862,\n  863,\n  864,\n  865,\n  2911,\n  4958,\n  2916,\n  4967,\n  4968,\n  873,\n  874,\n  4971,\n  878,\n  879,\n  4974,\n  881,\n  882,\n  2931,\n  2932,\n  2933,\n  2934,\n  9078,\n  4984,\n  2940,\n  4990,\n  4991,\n  2946,\n  4994,\n  900,\n  901,\n  2948,\n  4995,\n  16869,\n  5003,\n  16870,\n  5007,\n  9103,\n  913,\n  2963,\n  5011,\n  2965,\n  919,\n  920,\n  7066,\n  5019,\n  924,\n  5022,\n  927,\n  5025,\n  930,\n  931,\n  932,\n  933,\n  934,\n  5027,\n  2984,\n  937,\n  2986,\n  5032,\n  940,\n  941,\n  942,\n  943,\n  944,\n  9130,\n  946,\n  947,\n  948,\n  16876,\n  951,\n  954,\n  956,\n  957,\n  958,\n  959,\n  961,\n  962,\n  963,\n  964,\n  965,\n  3010,\n  3011,\n  9153,\n  970,\n  972,\n  3021,\n  975,\n  9168,\n  977,\n  978,\n  3025,\n  7123,\n  3033,\n  3034,\n  987,\n  3035,\n  3037,\n  3038,\n  991,\n  992,\n  993,\n  3040,\n  995,\n  997,\n  998,\n  999,\n  1000,\n  3047,\n  1002,\n  1003,\n  3051,\n  1005,\n  1011,\n  1012,\n  7156,\n  1014,\n  1017,\n  5114,\n  1019,\n  5118,\n  5123,\n  5125,\n  3080,\n  1036,\n  1038,\n  1039,\n  5137,\n  1042,\n  3090,\n  1044,\n  3093,\n  16489,\n  1047,\n  3096,\n  3097,\n  5143,\n  3099,\n  1052,\n  3101,\n  5148,\n  16491,\n  1056,\n  3108,\n  1061,\n  1062,\n  5165,\n  1070,\n  5166,\n  1072,\n  9261,\n  7220,\n  1077,\n  5173,\n  1080,\n  5177,\n  1082,\n  1085,\n  5182,\n  1087,\n  7232,\n  5185,\n  1091,\n  1092,\n  5188,\n  5190,\n  5191,\n  7235,\n  1099,\n  1100,\n  1101,\n  5198,\n  5199,\n  5200,\n  1105,\n  7250,\n  5203,\n  1111,\n  5209,\n  1114,\n  1115,\n  5212,\n  5215,\n  1120,\n  5217,\n  5220,\n  3175,\n  5224,\n  1131,\n  1133,\n  5229,\n  1141,\n  1143,\n  5239,\n  5244,\n  5245,\n  1151,\n  5247,\n  1153,\n  1154,\n  3202,\n  5248,\n  5251,\n  1159,\n  1160,\n  3207,\n  5256,\n  1163,\n  3211,\n  8779,\n  7314,\n  7316,\n  9367,\n  7320,\n  1177,\n  1187,\n  1191,\n  1194,\n  1197,\n  3245,\n  3248,\n  1206,\n  1213,\n  1214,\n  3263,\n  3264,\n  7361,\n  7362,\n  1219,\n  1220,\n  1225,\n  9427,\n  7383,\n  5336,\n  1250,\n  3305,\n  7403,\n  1261,\n  3309,\n  3312,\n  1265,\n  7410,\n  7414,\n  1271,\n  1276,\n  3325,\n  1278,\n  7422,\n  1280,\n  7424,\n  1283,\n  3331,\n  3333,\n  3336,\n  7432,\n  1290,\n  3339,\n  7433,\n  1293,\n  3341,\n  7435,\n  3346,\n  1301,\n  1302,\n  1309,\n  3357,\n  1313,\n  1314,\n  3362,\n  3365,\n  1322,\n  1324,\n  1328,\n  9523,\n  3382,\n  7481,\n  1339,\n  3395,\n  1348,\n  1349,\n  1351,\n  1352,\n  3402,\n  3403,\n  1362,\n  3411,\n  1365,\n  1366,\n  1367,\n  3416,\n  9565,\n  3425,\n  9569,\n  3428,\n  1382,\n  1386,\n  3440,\n  1405,\n  1408,\n  3459,\n  5508,\n  3474,\n  1433,\n  1434,\n  3481,\n  1436,\n  1440,\n  1442,\n  9637,\n  15782,\n  1448,\n  1449,\n  1450,\n  3496,\n  3498,\n  1453,\n  3505,\n  9650,\n  9651,\n  3509,\n  3510,\n  1463,\n  1464,\n  3525,\n  5574,\n  1482,\n  1484,\n  5581,\n  7628,\n  1489,\n  5588,\n  1493,\n  7638,\n  11733,\n  ...},\n 993: {0,\n  3,\n  8195,\n  6,\n  7,\n  8,\n  13,\n  16,\n  19,\n  20,\n  8211,\n  22,\n  24,\n  27,\n  31,\n  33,\n  36,\n  37,\n  39,\n  42,\n  46,\n  49,\n  50,\n  8243,\n  52,\n  55,\n  8247,\n  57,\n  60,\n  61,\n  65,\n  66,\n  67,\n  68,\n  8259,\n  8263,\n  73,\n  76,\n  77,\n  79,\n  80,\n  81,\n  83,\n  84,\n  85,\n  86,\n  87,\n  88,\n  89,\n  90,\n  91,\n  16471,\n  93,\n  94,\n  8287,\n  96,\n  97,\n  99,\n  100,\n  8293,\n  8295,\n  105,\n  107,\n  109,\n  8301,\n  8303,\n  115,\n  117,\n  118,\n  8309,\n  120,\n  121,\n  122,\n  8313,\n  124,\n  16509,\n  16510,\n  127,\n  128,\n  136,\n  138,\n  8335,\n  147,\n  149,\n  151,\n  8344,\n  8367,\n  176,\n  184,\n  188,\n  192,\n  197,\n  8390,\n  222,\n  224,\n  233,\n  234,\n  237,\n  241,\n  251,\n  16642,\n  16643,\n  260,\n  16644,\n  16658,\n  16669,\n  290,\n  303,\n  305,\n  306,\n  16690,\n  330,\n  333,\n  335,\n  336,\n  337,\n  8531,\n  340,\n  342,\n  344,\n  345,\n  358,\n  359,\n  16748,\n  368,\n  374,\n  376,\n  379,\n  380,\n  382,\n  385,\n  386,\n  387,\n  389,\n  8581,\n  391,\n  392,\n  393,\n  8583,\n  395,\n  396,\n  400,\n  401,\n  402,\n  404,\n  409,\n  410,\n  411,\n  415,\n  416,\n  16801,\n  419,\n  420,\n  423,\n  424,\n  433,\n  438,\n  440,\n  446,\n  457,\n  464,\n  466,\n  478,\n  16863,\n  480,\n  483,\n  485,\n  486,\n  8677,\n  490,\n  494,\n  495,\n  496,\n  502,\n  506,\n  507,\n  513,\n  516,\n  16906,\n  523,\n  524,\n  534,\n  535,\n  537,\n  8733,\n  546,\n  549,\n  551,\n  552,\n  554,\n  555,\n  556,\n  561,\n  563,\n  568,\n  570,\n  572,\n  573,\n  574,\n  575,\n  580,\n  581,\n  582,\n  583,\n  584,\n  586,\n  587,\n  590,\n  591,\n  593,\n  596,\n  608,\n  612,\n  614,\n  615,\n  618,\n  621,\n  622,\n  624,\n  625,\n  626,\n  631,\n  635,\n  8831,\n  8832,\n  645,\n  646,\n  647,\n  654,\n  656,\n  660,\n  661,\n  662,\n  663,\n  664,\n  8859,\n  674,\n  676,\n  678,\n  679,\n  683,\n  684,\n  685,\n  8879,\n  687,\n  688,\n  689,\n  691,\n  692,\n  693,\n  694,\n  696,\n  700,\n  701,\n  8895,\n  706,\n  711,\n  723,\n  8916,\n  725,\n  727,\n  729,\n  730,\n  731,\n  732,\n  735,\n  736,\n  737,\n  8928,\n  739,\n  742,\n  743,\n  744,\n  8935,\n  749,\n  752,\n  753,\n  754,\n  8946,\n  757,\n  759,\n  760,\n  762,\n  770,\n  772,\n  8965,\n  775,\n  777,\n  778,\n  781,\n  785,\n  786,\n  8978,\n  789,\n  796,\n  799,\n  8993,\n  806,\n  807,\n  808,\n  811,\n  812,\n  813,\n  817,\n  818,\n  820,\n  821,\n  824,\n  9018,\n  826,\n  831,\n  832,\n  836,\n  839,\n  841,\n  843,\n  847,\n  851,\n  853,\n  855,\n  857,\n  862,\n  864,\n  865,\n  868,\n  871,\n  873,\n  874,\n  878,\n  881,\n  882,\n  885,\n  887,\n  889,\n  890,\n  891,\n  9082,\n  895,\n  9089,\n  898,\n  900,\n  902,\n  903,\n  908,\n  909,\n  910,\n  911,\n  912,\n  915,\n  919,\n  921,\n  922,\n  9117,\n  927,\n  9119,\n  930,\n  931,\n  932,\n  934,\n  939,\n  940,\n  942,\n  944,\n  952,\n  956,\n  9148,\n  958,\n  959,\n  963,\n  964,\n  965,\n  970,\n  976,\n  978,\n  981,\n  984,\n  987,\n  991,\n  992,\n  993,\n  994,\n  9186,\n  999,\n  1000,\n  1019,\n  1033,\n  1034,\n  1035,\n  1037,\n  1041,\n  1042,\n  1044,\n  1045,\n  1052,\n  9244,\n  1057,\n  1064,\n  1067,\n  1073,\n  1083,\n  1094,\n  1100,\n  1103,\n  1107,\n  1111,\n  1112,\n  1113,\n  1114,\n  1115,\n  1120,\n  9314,\n  1125,\n  1131,\n  1141,\n  1146,\n  1149,\n  1154,\n  1160,\n  1163,\n  1165,\n  1167,\n  1179,\n  1194,\n  1197,\n  1205,\n  1207,\n  1224,\n  1230,\n  9425,\n  1235,\n  1237,\n  1243,\n  1244,\n  1247,\n  1248,\n  1249,\n  1257,\n  1261,\n  1265,\n  9470,\n  1287,\n  1288,\n  1291,\n  1299,\n  1300,\n  1301,\n  1306,\n  1309,\n  1311,\n  1313,\n  1315,\n  1316,\n  1317,\n  1318,\n  1319,\n  1322,\n  1327,\n  1333,\n  1336,\n  1338,\n  1339,\n  1346,\n  1347,\n  1349,\n  1350,\n  1351,\n  1352,\n  1353,\n  1361,\n  9553,\n  1365,\n  1366,\n  1367,\n  1373,\n  9567,\n  1377,\n  1382,\n  1389,\n  1392,\n  1401,\n  1405,\n  1408,\n  1409,\n  1411,\n  1412,\n  1413,\n  1414,\n  9607,\n  1417,\n  1418,\n  1419,\n  1420,\n  1422,\n  1423,\n  1424,\n  1425,\n  9617,\n  1433,\n  9629,\n  1449,\n  1451,\n  1459,\n  1462,\n  1464,\n  1469,\n  1483,\n  1484,\n  1489,\n  1490,\n  1492,\n  1496,\n  1497,\n  1503,\n  1505,\n  1507,\n  1509,\n  9706,\n  9710,\n  1520,\n  1524,\n  1526,\n  1559,\n  1568,\n  1571,\n  1572,\n  1576,\n  1577,\n  1578,\n  1583,\n  1588,\n  1592,\n  1605,\n  1607,\n  1610,\n  1620,\n  1627,\n  1634,\n  1636,\n  1639,\n  1641,\n  1644,\n  1648,\n  1650,\n  1651,\n  9851,\n  1660,\n  1662,\n  1663,\n  1664,\n  1671,\n  1682,\n  1687,\n  9883,\n  1700,\n  1704,\n  9903,\n  1722,\n  1740,\n  1744,\n  1760,\n  1768,\n  1774,\n  1778,\n  1783,\n  1786,\n  1788,\n  1789,\n  1820,\n  10013,\n  1821,\n  1822,\n  1831,\n  1840,\n  1843,\n  1849,\n  1851,\n  1855,\n  1857,\n  1865,\n  10057,\n  10070,\n  10074,\n  10077,\n  1895,\n  1908,\n  1931,\n  1933,\n  10137,\n  1948,\n  10155,\n  10166,\n  1975,\n  10175,\n  10179,\n  2012,\n  10206,\n  2035,\n  2057,\n  2060,\n  2067,\n  10262,\n  10266,\n  2089,\n  10323,\n  2151,\n  2155,\n  2156,\n  2157,\n  10365,\n  2200,\n  2201,\n  2204,\n  2210,\n  2212,\n  2215,\n  10409,\n  10411,\n  2223,\n  2228,\n  10421,\n  2235,\n  2237,\n  2240,\n  2243,\n  2245,\n  10443,\n  2252,\n  10444,\n  2263,\n  2273,\n  2282,\n  2287,\n  2291,\n  2292,\n  2293,\n  2308,\n  10513,\n  10518,\n  2336,\n  2345,\n  2356,\n  2361,\n  2366,\n  2380,\n  2383,\n  2384,\n  2399,\n  2419,\n  2424,\n  2436,\n  10638,\n  10642,\n  10643,\n  10647,\n  2466,\n  2470,\n  10676,\n  2486,\n  2490,\n  10684,\n  2493,\n  10685,\n  2515,\n  2523,\n  2540,\n  10736,\n  2547,\n  2552,\n  2573,\n  10767,\n  10768,\n  2589,\n  2614,\n  2615,\n  2621,\n  2629,\n  2632,\n  2633,\n  2645,\n  2649,\n  2650,\n  2664,\n  2668,\n  2671,\n  2672,\n  2675,\n  2676,\n  2680,\n  2681,\n  2682,\n  2695,\n  2702,\n  2709,\n  2711,\n  2712,\n  2713,\n  2715,\n  2718,\n  2726,\n  2728,\n  2734,\n  2738,\n  2739,\n  2740,\n  2741,\n  2744,\n  2745,\n  2747,\n  2751,\n  2759,\n  2761,\n  2763,\n  2774,\n  2776,\n  2781,\n  2782,\n  2785,\n  2786,\n  2787,\n  2791,\n  2794,\n  2809,\n  2813,\n  2815,\n  2832,\n  2843,\n  2852,\n  2853,\n  2870,\n  2871,\n  2872,\n  2876,\n  2878,\n  2883,\n  2913,\n  2916,\n  2918,\n  2928,\n  2931,\n  2937,\n  2948,\n  2956,\n  2980,\n  2982,\n  2984,\n  2985,\n  2986,\n  3007,\n  3012,\n  3025,\n  3029,\n  3035,\n  3038,\n  3040,\n  3052,\n  3055,\n  3059,\n  3063,\n  3072,\n  3075,\n  3077,\n  3092,\n  3096,\n  3105,\n  3106,\n  3145,\n  3192,\n  3202,\n  3204,\n  3206,\n  3208,\n  3211,\n  3213,\n  3216,\n  3232,\n  3255,\n  3258,\n  3264,\n  3266,\n  3271,\n  3280,\n  3287,\n  3290,\n  3298,\n  3299,\n  3303,\n  3305,\n  3308,\n  3322,\n  3346,\n  3352,\n  3356,\n  3359,\n  3368,\n  3370,\n  3383,\n  3384,\n  3393,\n  3396,\n  3401,\n  3429,\n  3434,\n  3443,\n  3449,\n  3463,\n  3475,\n  11711,\n  3525,\n  3528,\n  3531,\n  11728,\n  3575,\n  3578,\n  11771,\n  11774,\n  11776,\n  3588,\n  3590,\n  3596,\n  11790,\n  3603,\n  11806,\n  3630,\n  11822,\n  3639,\n  3642,\n  3663,\n  3667,\n  3669,\n  3681,\n  11873,\n  3684,\n  3687,\n  3692,\n  3694,\n  3700,\n  3708,\n  11901,\n  3713,\n  3719,\n  3721,\n  11914,\n  11917,\n  3726,\n  3729,\n  3738,\n  3745,\n  3752,\n  11944,\n  3757,\n  3763,\n  11961,\n  3780,\n  3781,\n  11975,\n  3793,\n  11988,\n  3809,\n  3816,\n  3822,\n  3826,\n  3830,\n  12028,\n  3862,\n  3871,\n  3872,\n  12068,\n  12082,\n  12083,\n  3905,\n  12100,\n  3911,\n  3919,\n  12117,\n  3929,\n  3931,\n  3934,\n  3939,\n  12133,\n  12134,\n  12135,\n  3950,\n  12143,\n  12154,\n  3965,\n  12157,\n  12161,\n  12163,\n  3973,\n  12165,\n  12166,\n  3978,\n  3980,\n  3982,\n  3983,\n  3986,\n  3990,\n  12189,\n  3998,\n  12205,\n  4031,\n  12224,\n  4052,\n  12254,\n  4067,\n  4078,\n  4084,\n  4089,\n  4097,\n  4098,\n  4099,\n  4110,\n  4136,\n  4147,\n  4148,\n  4150,\n  4160,\n  12369,\n  4191,\n  4200,\n  12394,\n  4223,\n  4242,\n  4243,\n  4271,\n  4272,\n  4282,\n  4283,\n  4290,\n  4293,\n  4299,\n  4304,\n  4307,\n  4319,\n  4326,\n  12519,\n  4333,\n  4336,\n  4338,\n  4356,\n  4359,\n  4364,\n  12560,\n  4369,\n  12566,\n  4392,\n  4395,\n  4398,\n  4402,\n  4404,\n  4406,\n  4407,\n  4413,\n  4419,\n  4422,\n  4423,\n  4424,\n  12625,\n  12627,\n  12629,\n  12632,\n  12649,\n  4461,\n  4463,\n  12672,\n  4486,\n  12682,\n  12683,\n  12684,\n  4493,\n  4495,\n  4501,\n  12703,\n  4512,\n  12711,\n  4558,\n  4569,\n  4570,\n  12763,\n  4583,\n  12775,\n  4588,\n  4592,\n  4594,\n  4598,\n  4601,\n  4603,\n  4607,\n  4626,\n  4630,\n  12824,\n  4637,\n  4638,\n  4639,\n  4640,\n  4643,\n  4644,\n  4645,\n  4648,\n  4649,\n  ...},\n 994: {3,\n  66,\n  115,\n  124,\n  371,\n  419,\n  425,\n  428,\n  466,\n  486,\n  509,\n  562,\n  586,\n  678,\n  715,\n  729,\n  731,\n  853,\n  1105,\n  1111,\n  1822,\n  2070,\n  2235,\n  2290,\n  2335,\n  2436,\n  2441,\n  2483,\n  2486,\n  2658,\n  2664,\n  2820,\n  2933,\n  5025},\n 995: {19, 522, 2237, 2293, 2644, 2739, 3758, 3762, 5838},\n 996: set(),\n 997: set(),\n 998: {8,\n  17,\n  20,\n  22,\n  24,\n  31,\n  39,\n  45,\n  46,\n  50,\n  52,\n  56,\n  61,\n  62,\n  68,\n  69,\n  70,\n  77,\n  78,\n  80,\n  81,\n  84,\n  85,\n  86,\n  87,\n  89,\n  90,\n  94,\n  95,\n  96,\n  97,\n  98,\n  101,\n  105,\n  110,\n  113,\n  114,\n  115,\n  117,\n  118,\n  120,\n  121,\n  122,\n  123,\n  125,\n  128,\n  130,\n  131,\n  136,\n  137,\n  138,\n  151,\n  224,\n  251,\n  264,\n  269,\n  306,\n  309,\n  335,\n  336,\n  340,\n  342,\n  344,\n  347,\n  351,\n  354,\n  355,\n  358,\n  359,\n  368,\n  374,\n  379,\n  382,\n  385,\n  387,\n  388,\n  390,\n  392,\n  396,\n  400,\n  401,\n  402,\n  407,\n  408,\n  409,\n  410,\n  411,\n  415,\n  416,\n  419,\n  420,\n  423,\n  424,\n  427,\n  428,\n  438,\n  448,\n  478,\n  488,\n  489,\n  492,\n  497,\n  506,\n  512,\n  522,\n  527,\n  534,\n  535,\n  551,\n  552,\n  563,\n  566,\n  568,\n  569,\n  572,\n  580,\n  581,\n  592,\n  593,\n  595,\n  596,\n  598,\n  599,\n  614,\n  628,\n  635,\n  643,\n  646,\n  647,\n  656,\n  658,\n  661,\n  663,\n  664,\n  679,\n  693,\n  694,\n  695,\n  699,\n  701,\n  705,\n  710,\n  723,\n  725,\n  732,\n  733,\n  737,\n  742,\n  743,\n  744,\n  749,\n  753,\n  754,\n  757,\n  758,\n  759,\n  760,\n  764,\n  767,\n  770,\n  772,\n  777,\n  778,\n  780,\n  785,\n  787,\n  789,\n  790,\n  792,\n  799,\n  802,\n  803,\n  806,\n  807,\n  808,\n  810,\n  812,\n  813,\n  816,\n  817,\n  819,\n  820,\n  831,\n  832,\n  841,\n  843,\n  847,\n  848,\n  849,\n  854,\n  862,\n  865,\n  868,\n  869,\n  875,\n  878,\n  879,\n  881,\n  882,\n  885,\n  891,\n  894,\n  895,\n  898,\n  902,\n  904,\n  905,\n  910,\n  911,\n  912,\n  914,\n  915,\n  921,\n  924,\n  927,\n  930,\n  932,\n  936,\n  941,\n  942,\n  950,\n  955,\n  958,\n  959,\n  960,\n  971,\n  982,\n  984,\n  992,\n  994,\n  1007,\n  1021,\n  1032,\n  1033,\n  1042,\n  1044,\n  1050,\n  1059,\n  1067,\n  1070,\n  1082,\n  1083,\n  1089,\n  1108,\n  1109,\n  1116,\n  1120,\n  1123,\n  1124,\n  1131,\n  1173,\n  1179,\n  1181,\n  1197,\n  1198,\n  1219,\n  1235,\n  1248,\n  1250,\n  1285,\n  1296,\n  1300,\n  1309,\n  1310,\n  1312,\n  1318,\n  1327,\n  1328,\n  1336,\n  1339,\n  1346,\n  1348,\n  1361,\n  1367,\n  1373,\n  1392,\n  1409,\n  1473,\n  1482,\n  1484,\n  1531,\n  1573,\n  1605,\n  1607,\n  1610,\n  1625,\n  1644,\n  1650,\n  1651,\n  1652,\n  1671,\n  1683,\n  1697,\n  1756,\n  1760,\n  1765,\n  1790,\n  1795,\n  1815,\n  1842,\n  1858,\n  1898,\n  1908,\n  1910,\n  1933,\n  1935,\n  1938,\n  1945,\n  1951,\n  1981,\n  2052,\n  2057,\n  2072,\n  2154,\n  2199,\n  2200,\n  2206,\n  2208,\n  2252,\n  2257,\n  2263,\n  2264,\n  2271,\n  2273,\n  2280,\n  2292,\n  2311,\n  2320,\n  2329,\n  2333,\n  2365,\n  2383,\n  2384,\n  2386,\n  2398,\n  2406,\n  2415,\n  2466,\n  2468,\n  2515,\n  2523,\n  2581,\n  2587,\n  2589,\n  2605,\n  2618,\n  2621,\n  2629,\n  2630,\n  2644,\n  2667,\n  2676,\n  2679,\n  2682,\n  2700,\n  2713,\n  2718,\n  2723,\n  2726,\n  2741,\n  2743,\n  2768,\n  2773,\n  2774,\n  2782,\n  2785,\n  2791,\n  2807,\n  2809,\n  2811,\n  2815,\n  2855,\n  2887,\n  2902,\n  2934,\n  2973,\n  2982,\n  2988,\n  3015,\n  3040,\n  3055,\n  3063,\n  3096,\n  3101,\n  3102,\n  3109,\n  3175,\n  3202,\n  3211,\n  3214,\n  3222,\n  3243,\n  3262,\n  3264,\n  3271,\n  3281,\n  3295,\n  3324,\n  3334,\n  3336,\n  3344,\n  3370,\n  3403,\n  3411,\n  3416,\n  3440,\n  3459,\n  3474,\n  3494,\n  3506,\n  3509,\n  3530,\n  3533,\n  3595,\n  3617,\n  3635,\n  3647,\n  3653,\n  3667,\n  3670,\n  3681,\n  3687,\n  3700,\n  3726,\n  3734,\n  3758,\n  3762,\n  3800,\n  3824,\n  3826,\n  3839,\n  3875,\n  3924,\n  3928,\n  3971,\n  3975,\n  3978,\n  3979,\n  3997,\n  4057,\n  4060,\n  4074,\n  4088,\n  4110,\n  4148,\n  4223,\n  4268,\n  4311,\n  4319,\n  4336,\n  4343,\n  4359,\n  4369,\n  4452,\n  4454,\n  4501,\n  4517,\n  4521,\n  4610,\n  4626,\n  4640,\n  4667,\n  4869,\n  4936,\n  4952,\n  4975,\n  5027,\n  5065,\n  5081,\n  5101,\n  5180,\n  5183,\n  5339,\n  5567,\n  5591,\n  5592,\n  5639,\n  5649,\n  5660,\n  5702,\n  5770,\n  5783,\n  5810,\n  5838,\n  5936,\n  5950,\n  6086,\n  6141,\n  6152,\n  6153,\n  6155,\n  6204,\n  6212,\n  6275,\n  6307,\n  6390,\n  6613,\n  6738,\n  6745,\n  6835,\n  6866,\n  7028,\n  7170,\n  7172,\n  7190,\n  7328,\n  7428,\n  7448,\n  7695,\n  7726,\n  7744,\n  8074,\n  8337,\n  8442,\n  8535,\n  8740,\n  8875,\n  8895,\n  9089,\n  9143,\n  9644,\n  9674,\n  9714,\n  9726,\n  9851,\n  10029,\n  10082,\n  10312,\n  10327,\n  10419,\n  10424,\n  10443,\n  10447,\n  10553,\n  10738,\n  11064,\n  11835,\n  11912,\n  12005,\n  12081,\n  12133,\n  12450,\n  12570,\n  12800,\n  12836,\n  12873,\n  12948,\n  13025,\n  13040,\n  13346,\n  13400,\n  13569,\n  13570,\n  13641,\n  13731,\n  13747,\n  13782,\n  13876,\n  13932,\n  13958,\n  13962,\n  14039,\n  14471,\n  14562,\n  14895,\n  16424,\n  16749,\n  16765,\n  16815,\n  16818,\n  16919,\n  16920},\n 999: {20,\n  27,\n  31,\n  36,\n  37,\n  38,\n  52,\n  66,\n  68,\n  69,\n  70,\n  74,\n  77,\n  80,\n  81,\n  83,\n  84,\n  85,\n  88,\n  89,\n  90,\n  91,\n  93,\n  94,\n  99,\n  101,\n  105,\n  109,\n  113,\n  117,\n  125,\n  131,\n  136,\n  137,\n  138,\n  224,\n  227,\n  237,\n  251,\n  330,\n  335,\n  336,\n  337,\n  344,\n  345,\n  374,\n  382,\n  385,\n  386,\n  388,\n  393,\n  401,\n  409,\n  411,\n  415,\n  419,\n  423,\n  424,\n  509,\n  536,\n  537,\n  552,\n  565,\n  568,\n  575,\n  581,\n  593,\n  595,\n  596,\n  612,\n  661,\n  663,\n  689,\n  692,\n  694,\n  715,\n  729,\n  732,\n  737,\n  743,\n  744,\n  754,\n  777,\n  780,\n  786,\n  788,\n  789,\n  790,\n  799,\n  808,\n  812,\n  817,\n  820,\n  832,\n  847,\n  865,\n  878,\n  881,\n  882,\n  906,\n  963,\n  979,\n  994,\n  1089,\n  1489,\n  1634,\n  1650,\n  1756,\n  1789,\n  2157,\n  2333,\n  2383,\n  2398,\n  2515,\n  2589,\n  2658,\n  2713,\n  2739,\n  2774,\n  2811,\n  2815,\n  2916,\n  3012,\n  3040,\n  3084,\n  3091,\n  3211,\n  3322,\n  3708,\n  3978,\n  3990,\n  4055,\n  4097,\n  4223,\n  4346,\n  4952},\n ...}"
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_recommend = dict(\n",
    "    (\n",
    "        uix, \n",
    "        set(np.where(user_movie_intrest_matrix[uix] > 1)[0])\n",
    "    )\n",
    "    for uix in range(user_count)\n",
    ")\n",
    "user_recommend"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": "('precision', 0.15546528117510822)\n('recall', 0.13647440342022432)\n"
    }
   ],
   "source": [
    "ac_quantity = 0\n",
    "recommend_quantity = 0 \n",
    "user_fav_quantity = 0\n",
    "for uix in range(user_count):\n",
    "    if uix not in user_fav:\n",
    "        recommend_quantity += len(user_recommend[uix])\n",
    "        continue\n",
    "    ac_quantity += len(user_recommend[uix] &\n",
    "     user_fav[uix])\n",
    "    recommend_quantity += len(user_recommend[uix])\n",
    "    user_fav_quantity += len(user_fav[uix])\n",
    "\n",
    "print(('precision', ac_quantity/ recommend_quantity))\n",
    "print(('recall', ac_quantity/(user_fav_quantity + 1)))"
   ]
  }
 ]
}